Commit 72f5785f authored by huaerkl's avatar huaerkl
Browse files

v1.0

parents
Pipeline #505 canceled with stages
# Use 2.1 for orbs
version: 2.1
# -------------------------------------------------------------------------------------
# Environments to run the jobs in
# -------------------------------------------------------------------------------------
gpu: &gpu
environment:
CUDA_VERSION: "11.2"
machine:
image: ubuntu-2004-cuda-11.2:202103-01
resource_class: gpu.nvidia.medium.multi
# -------------------------------------------------------------------------------------
# Re-usable commands
# -------------------------------------------------------------------------------------
cache_key: &cache_key cache-key-{{ .Environment.CIRCLE_JOB }}-{{ checksum ".circleci/config.yml" }}-{{ checksum "setup.py"}}
install_dep_pt1_10: &install_dep_pt1_10
- run:
name: Install Pytorch Dependencies
command: |
source activate fairseq
pip install --upgrade setuptools
pip install torch==1.10.1+cu111 torchaudio==0.10.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html
python -c 'import torch; print("Torch version:", torch.__version__)'
install_dep_pt1_12: &install_dep_pt1_12
- run:
name: Install Pytorch Dependencies
command: |
source activate fairseq
pip install --upgrade setuptools
pip install torch==1.12.1+cu116 torchaudio==0.12.1+cu116 -f https://download.pytorch.org/whl/torch_stable.html
python -c 'import torch; print("Torch version:", torch.__version__)'
install_repo: &install_repo
- run:
name: Install Repository
command: |
source activate fairseq
python -m pip install fairscale
python -m pip install -e '.[dev,docs]'
python -c 'import torch; print("Torch version:", torch.__version__)'
run_unittests: &run_unittests
- run:
name: Run Unit Tests
command: |
source activate fairseq
pytest tests/gpu/test_binaries_gpu.py
check_nvidia_driver: &check_nvidia_driver
- run:
name: Check NVIDIA Driver
working_directory: ~/
command: |
pyenv versions
nvidia-smi
create_conda_env: &create_conda_env
- run:
name: Install and Create Conda Environment
command: |
curl -o ~/miniconda.sh -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
chmod +x ~/miniconda.sh
bash ~/miniconda.sh -b -p $HOME/miniconda
rm ~/miniconda.sh
echo 'export PATH=$HOME/miniconda/bin:$PATH' >> $BASH_ENV
source $BASH_ENV
if [ ! -d ~/miniconda/envs/fairseq ]
then
conda create -y -n fairseq python=3.8
fi
source activate fairseq
python --version
pip install --upgrade pip
# -------------------------------------------------------------------------------------
# Jobs to run
# -------------------------------------------------------------------------------------
jobs:
gpu_tests_pt1_10:
<<: *gpu
working_directory: ~/fairseq-py
steps:
- checkout
- <<: *check_nvidia_driver
- <<: *create_conda_env
- restore_cache:
key: *cache_key
- <<: *install_dep_pt1_10
- save_cache:
paths:
- ~/miniconda/
key: *cache_key
- <<: *install_repo
- <<: *run_unittests
gpu_tests_pt1_12:
<<: *gpu
working_directory: ~/fairseq-py
steps:
- checkout
- <<: *check_nvidia_driver
- <<: *create_conda_env
- restore_cache:
key: *cache_key
- <<: *install_dep_pt1_12
- save_cache:
paths:
- ~/miniconda/
key: *cache_key
- <<: *install_repo
- <<: *run_unittests
workflows:
version: 2
build:
jobs:
- gpu_tests_pt1_12
- gpu_tests_pt1_10
# Setting up CODEOWNERS for UST related codebase
# Documentation for open sourced models relevant to UST
examples/speech_to_text @kahne @sravyapopuri388 @jmp84
examples/speech_to_speech @an918tw @sravyapopuri388 @jmp84
examples/speech_synthesis @kahne @jmp84
examples/simultaneous_translation @kahne @jmp84
examples/speech_text_joint_to_text @yuntang @jmp84
# Speech related models relevant to UST
fairseq/models/speech_to_speech @sravyapopuri388 @jmp84
fairseq/models/speech_to_text @kahne @sravyapopuri388 @jmp84
fairseq/models/text_to_speech @kahne @jmp84
# CONFORMER IMPLEMENTATION
fairseq/modules/conformer_layer.py @sravyapopuri388 @jmp84
fairseq/modules/espnet_multihead_attention.py @sravyapopuri388 @jmp84
fairseq/modules/rotary_positional_embedding.py @sravyapopuri388 @jmp84
fairseq/modules/positional_encoding.py @sravyapopuri388 @jmp84
# Machine Translation/NLLB
fairseq/tasks/translation.py @gwenzek
## 👉 [Please follow one of these issue templates](https://github.com/pytorch/fairseq/issues/new/choose) 👈
Note: to keep the backlog clean and actionable, issues may be immediately closed if they do not follow one of the above issue templates.
---
name: 🐛 Bug Report
about: Submit a bug report to help us improve
labels: 'bug, needs triage'
---
## 🐛 Bug
<!-- A clear and concise description of what the bug is. -->
### To Reproduce
Steps to reproduce the behavior (**always include the command you ran**):
1. Run cmd '....'
2. See error
<!-- If you have a code sample, error messages, stack traces, please provide it here as well -->
#### Code sample
<!-- Ideally attach a minimal code sample to reproduce the decried issue.
Minimal means having the shortest code but still preserving the bug. -->
### Expected behavior
<!-- A clear and concise description of what you expected to happen. -->
### Environment
- fairseq Version (e.g., 1.0 or main):
- PyTorch Version (e.g., 1.0)
- OS (e.g., Linux):
- How you installed fairseq (`pip`, source):
- Build command you used (if compiling from source):
- Python version:
- CUDA/cuDNN version:
- GPU models and configuration:
- Any other relevant information:
### Additional context
<!-- Add any other context about the problem here. -->
---
name: 📚 Documentation/Typos
about: Report an issue related to documentation or a typo
labels: 'documentation, needs triage'
---
## 📚 Documentation
For typos and doc fixes, please go ahead and:
1. Create an issue.
2. Fix the typo.
3. Submit a PR.
Thanks!
---
name: 🚀 Feature Request
about: Submit a proposal/request for a new feature
labels: 'enhancement, help wanted, needs triage'
---
## 🚀 Feature Request
<!-- A clear and concise description of the feature proposal -->
### Motivation
<!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
### Pitch
<!-- A clear and concise description of what you want to happen. -->
### Alternatives
<!-- A clear and concise description of any alternative solutions or features you've considered, if any. -->
### Additional context
<!-- Add any other context or screenshots about the feature request here. -->
---
name: ❓ Questions/Help
about: If you have questions, please first search existing issues and docs
labels: 'question, needs triage'
---
## ❓ Questions and Help
### Before asking:
1. search the issues.
2. search the docs.
<!-- If you still can't find what you need: -->
#### What is your question?
#### Code
<!-- Please paste a code snippet if your question requires it! -->
#### What have you tried?
#### What's your environment?
- fairseq Version (e.g., 1.0 or main):
- PyTorch Version (e.g., 1.0)
- OS (e.g., Linux):
- How you installed fairseq (`pip`, source):
- Build command you used (if compiling from source):
- Python version:
- CUDA/cuDNN version:
- GPU models and configuration:
- Any other relevant information:
# Before submitting
- [ ] Was this discussed/approved via a Github issue? (no need for typos, doc improvements)
- [ ] Did you read the [contributor guideline](https://github.com/pytorch/fairseq/blob/main/CONTRIBUTING.md)?
- [ ] Did you make sure to update the docs?
- [ ] Did you write any new necessary tests?
## What does this PR do?
Fixes # (issue).
## PR review
Anyone in the community is free to review the PR once the tests have passed.
If we didn't discuss your PR in Github issues there's a high chance it will not be merged.
## Did you have fun?
Make sure you had fun coding 🙃
# Configuration for probot-stale - https://github.com/probot/stale
# Mostly copied from github.com/facebook/react/blob/master/.github/stale.yml
# Number of days of inactivity before an issue becomes stale
daysUntilStale: 90
# Number of days of inactivity before a stale issue is closed
daysUntilClose: 7
# Issues with these labels will never be considered stale
exemptLabels:
- bug
# Label to use when marking an issue as stale
staleLabel: stale
issues:
# Comment to post when marking an issue as stale.
markComment: >
This issue has been automatically marked as stale.
**If this issue is still affecting you, please leave any comment** (for example, "bump"), and we'll keep it open.
We are sorry that we haven't been able to prioritize it yet. If you have any new additional information, please include it with your comment!
# Comment to post when closing a stale issue.
closeComment: >
Closing this issue after a prolonged period of inactivity. If this issue is still present in the latest release, please create a new issue with up-to-date information. Thank you!
pulls:
# Comment to post when marking a pull request as stale.
markComment: >
This pull request has been automatically marked as stale.
**If this pull request is still relevant, please leave any comment** (for example, "bump"), and we'll keep it open.
We are sorry that we haven't been able to prioritize reviewing it yet. Your contribution is very much appreciated.
# Comment to post when closing a stale pull request.
closeComment: >
Closing this pull request after a prolonged period of inactivity. If this issue is still present in the latest release, please ask for this pull request to be reopened. Thank you!
name: build
on:
# Trigger the workflow on push to main or any pull request
push:
branches:
- main
pull_request:
jobs:
build:
strategy:
max-parallel: 4
matrix:
platform: [ubuntu-latest, macos-latest]
python-version: [3.8, 3.9]
runs-on: ${{ matrix.platform }}
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Conditionally install pytorch
if: matrix.platform == 'windows-latest'
run: pip3 install torch -f https://download.pytorch.org/whl/torch_stable.html
- name: Install locally
run: |
python -m pip install --upgrade pip
git submodule update --init --recursive
python -m pip install .
- name: Check installation
working-directory: /tmp
run: python $GITHUB_WORKSPACE/scripts/check_installation.py
- name: Install optional test requirements
run: |
python -m pip install '.[dev,docs]'
python -m pip install iopath transformers pyarrow
python -m pip install git+https://github.com/facebookresearch/fairscale.git@main
python -m pip install pygit2 pgzip
- name: Install xformers for Macos
if: matrix.platform == 'macos-latest'
run: |
brew install llvm libomp
CC=/usr/local/opt/llvm/bin/clang CXX=clang++ pip install git+https://github.com/facebookresearch/xformers.git@main
- name: Install xformers for non-MacOS
if: matrix.platform != 'macos-latest'
run: |
python -m pip install --progress-bar off git+https://github.com/facebookresearch/xformers.git@main
- name: Lint with black
run: black --check --diff .
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Build doc
run: make singlehtml
working-directory: docs/
- name: Run tests
# When installing in non-editable mode, the .so files will be generated in 'site-packages/fairseq'.
# But by default, pytest import machinery will load local fairseq, and won't see the .so.
# Use --import-mode=append to favorize the 'site-packages/fairseq'.
# https://docs.pytest.org/en/7.1.x/explanation/pythonpath.html
run: pytest --import-mode=append -vvv tests/
name: Fairseq Release
on:
workflow_dispatch:
inputs:
name:
description: 'Release Type'
default: 'patch'
required: true
jobs:
get_next_version:
runs-on: ubuntu-latest
steps:
- name: checkout-repo-content
uses: actions/checkout@v2
- name: setup-python
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: get next version and tag
id: get-next-version-and-tag
run: |
output=$(python3 release_utils.py --release-type ${{ github.event.inputs.name }})
echo $output
new_version=$(echo $output | awk '{print $1}')
new_tag=$(echo $output | awk '{print $2}')
echo "new version is $new_version"
echo "new tag is $new_tag"
echo ::set-output name=version::$new_version
echo ::set-output name=tag::$new_tag
echo ::set-output name=branch_name::$new_version-release
echo "NEW_TAG=$new_tag" >> $GITHUB_ENV
echo "NEW_BRANCH=$new_version-release" >> $GITHUB_ENV
# update the version number in version.txt
- name: update version
id: update-version
run : |
echo "current folder = $PWD"
echo "current branch = $(git branch --show-current)"
output=$(python3 release_utils.py --release-type ${{ github.event.inputs.name }} --update-version)
- name: add and commit
uses: EndBug/add-and-commit@v9
with:
author_name: ${{ secrets.AUTHOR_NAME }}
author_email: ${{ secrets.AUTHOR_EMAIL }}
# TODO: change this to main once shipit is disabled.
new_branch: '${{ env.NEW_BRANCH }}'
default_author: github_actor
message: '${{ env.NEW_TAG }} release'
pathspec_error_handling: exitAtEnd
# Arguments for the git pull command. Use NO-PULL to avoid the action pulling at all.
# pull: 'NO-PULL'
tag: '${{ env.NEW_TAG }}'
outputs:
new_version: ${{ steps.get-next-version-and-tag.outputs.version }}
new_tag: ${{ steps.get-next-version-and-tag.outputs.tag }}
branch_name: ${{ steps.get-next-version-and-tag.outputs.branch_name }}
create_sdist:
runs-on: ubuntu-latest
name: Create Source Distribution
needs: get_next_version
steps:
- uses: actions/checkout@v3
with:
ref: ${{ needs.get_next_version.outputs.branch_name }}
- name: Install Python
uses: actions/setup-python@v2
with:
python-version: '3.8'
- name: Upgrade pip
run: |
python3 -m pip install --upgrade pip
- name: Create Source Distribution
run: |
python3 -m pip install setuptools wheel twine torch
python3 setup.py sdist
- uses: actions/upload-artifact@v2
with:
path: dist/*.tar.gz
build_wheels:
name: Build wheels on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
needs: get_next_version
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
steps:
- uses: actions/checkout@v3
with:
ref: ${{ needs.get_next_version.outputs.branch_name }}
- name: Install Python
uses: actions/setup-python@v2
with:
python-version: '3.8'
- name: Upgrade pip
run: |
python3 -m pip install --upgrade pip
- name: Install cibuildwheel
run: |
python3 -m pip install cibuildwheel
- name: Build wheels for CPython
run: |
python3 -m cibuildwheel --output-dir dist
env:
CIBW_BUILD: "cp38-*64"
CIBW_MANYLINUX_X86_64_IMAGE: manylinux1
CIBW_BEFORE_BUILD: git submodule update --init --recursive && pip install .
# Install system library
CIBW_BEFORE_BUILD_LINUX: (yum install -y libffi-devel || apt-get install -y libffi-devel || apk add --update --no-cache libffi-devel || true) && (yum install -y libc6 || apt-get install -y libc6 || apk add --update --no-cache libc6 || true)
CIBW_ENVIRONMENT: "PIP_ONLY_BINARY=numpy"
CIBW_SKIP: "*musllinux*"
- uses: actions/upload-artifact@v2
with:
path: dist
upload:
name: Upload to PyPi and create release
runs-on: ubuntu-latest
needs: [build_wheels, create_sdist, get_next_version]
steps:
- uses: actions/download-artifact@v2
with:
name: artifact
path: dist
# build the PyPI package and upload it
- name: upload
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
pip install setuptools wheel twine
python3 -m twine upload --repository pypi dist/*
# create the release on github
- name: create release on github
uses: ncipollo/release-action@v1
with:
tag: '${{ needs.get_next_version.outputs.new_tag }}'
# JetBrains PyCharm IDE
.idea/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# macOS dir files
.DS_Store
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# Checkpoints
checkpoints
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# dotenv
.env
# virtualenv
.venv
venv/
ENV/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# Generated files
/fairseq/temporal_convolution_tbc
/fairseq/modules/*_layer/*_forward.cu
/fairseq/modules/*_layer/*_backward.cu
/fairseq/version.py
# data
data-bin/
# reranking
/examples/reranking/rerank_data
# Cython-generated C++ source files
/fairseq/data/data_utils_fast.cpp
/fairseq/data/token_block_utils_fast.cpp
# VSCODE
.vscode/ftp-sync.json
.vscode/settings.json
# Experimental Folder
experimental/*
# Weights and Biases logs
wandb/
# Hydra artifacts
nohup.out
multirun
outputs
[submodule "fairseq/model_parallel/megatron"]
path = fairseq/model_parallel/megatron
url = https://github.com/ngoyal2707/Megatron-LM
branch = fairseq
exclude: 'build|stubs'
default_language_version:
python: python3
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.1.0
hooks:
- id: trailing-whitespace
- id: check-ast
- id: check-merge-conflict
- id: no-commit-to-branch
args: ['--branch=master']
- id: check-added-large-files
args: ['--maxkb=500']
- id: end-of-file-fixer
- repo: https://github.com/ambv/black
rev: 22.3.0
hooks:
- id: black
language_version: python3.8
- repo: https://gitlab.com/pycqa/flake8
rev: 3.9.2
hooks:
- id: flake8
args: [
# only error for syntax errors and undefined names
"--select=E9,F63,F7,F82",
]
- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
- id: isort
exclude: README.md
additional_dependencies: [toml]
args: ["--profile", "black"]
# Code of Conduct
## Our Pledge
In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to make participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.
## Our Standards
Examples of behavior that contributes to creating a positive environment
include:
* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members
Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery and unwelcome sexual attention or
advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.
## Scope
This Code of Conduct applies within all project spaces, and it also applies when
an individual is representing the project or its community in public spaces.
Examples of representing a project or community include using an official
project e-mail address, posting via an official social media account, or acting
as an appointed representative at an online or offline event. Representation of
a project may be further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at <conduct@pytorch.org>. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
[homepage]: https://www.contributor-covenant.org
For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq
# Contributing to Facebook AI Research Sequence-to-Sequence Toolkit (fairseq)
We want to make contributing to this project as easy and transparent as
possible.
## Pull Requests
We actively welcome your pull requests.
1. Fork the repo and create your branch from `main`.
2. If you've added code that should be tested, add tests.
3. If you've changed APIs, update the documentation.
4. Ensure the test suite passes.
5. Make sure your code lints.
6. If you haven't already, complete the Contributor License Agreement ("CLA").
## Contributor License Agreement ("CLA")
In order to accept your pull request, we need you to submit a CLA. You only need
to do this once to work on any of Facebook's open source projects.
Complete your CLA here: <https://code.facebook.com/cla>
## Issues
We use GitHub issues to track public bugs. Please ensure your description is
clear and has sufficient instructions to be able to reproduce the issue.
## License
By contributing to Facebook AI Research Sequence-to-Sequence Toolkit (fairseq),
you agree that your contributions will be licensed under the LICENSE file in
the root directory of this source tree.
## Pre-commit hooks
In order to ensure your code lints, there are pre-commit hooks configured in the repository which you can install.
After installation, they will automatically run each time you commit.
An abbreviated guide is given below; for more information, refer to [the offical pre-commit documentation](https://pre-commit.com/).
### Installation
```
pip install pre-commit
pre-commit install
```
### Usage
Just commit your changes:
```
git commit -m "My informative commit message"
```
If there was a failure, you will get feedback
```
[INFO] Initializing environment for https://github.com/PyCQA/flake8.
[INFO] Installing environment for https://github.com/pre-commit/pre-commit-hooks.
[INFO] Once installed this environment will be reused.
[INFO] This may take a few minutes...
[INFO] Installing environment for https://github.com/PyCQA/flake8.
[INFO] Once installed this environment will be reused.
[INFO] This may take a few minutes...
Trim Trailing Whitespace.................................................Failed
- hook id: trailing-whitespace
- exit code: 1
- files were modified by this hook
Fixing examples/nllb/modeling/wmt15_benchmark/eval_langs2.sh
Fix End of Files.........................................................Failed
- hook id: end-of-file-fixer
- exit code: 1
- files were modified by this hook
Fixing examples/few_shot/scripts/schedule_jobs_few_shot.py
flake8...................................................................Passed
```
Certain hooks modify your files to comply.
To include these modifications, you will need to add them (i.e. `git add ...`) and commit again.
If all is well, you should see something like:
```
Trim Trailing Whitespace.................................................Passed
Fix End of Files.........................................................Passed
flake8...................................................................Passed
[gshard-fix-ci 8698644e1] Fix lint, add pre-commit hooks
10 files changed, 148 insertions(+), 110 deletions(-)
create mode 100644 .flake8
create mode 100644 .pre-commit-config.yaml
rename examples/nllb/modeling/wmt15_benchmark/{eval_langs2.py => eval_langs2.sh} (99%)
```
MIT License
Copyright (c) Facebook, Inc. and its affiliates.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
include fairseq/version.txt
# DATA2VEC
## 论文
`Efficient Self-supervised Learning with Contextualized Target Representations for Vision, Speech and Language`
- https://arxiv.org/abs/2212.07525
## 模型结构
data2vec的主要思路就是先建立一个教师网络,首先计算来自图像、文本或语音的目标表征。然后对数据进行掩码遮盖掉部分输入,并用一个学生网络重复该过程预测教师模型得到的表征。也就是说,学生模型只能在接受「不完整输入信息」的同时预测「完整输入数据」的表示,为了保证两个模型的一致性,二者的参数是共享的,但在训练初期会让Teacher模型的参数更新更快。
![img](./images/datavec.png)
## 算法原理
Meta AI发布的data2cec 2.0版本,在性能方面对上一代进行了改进:在精度相同的情况下,训练速度相比其他算法最高提升了16倍!主要解决的痛点是构建自监督模型需要大量的GPU做算力支撑才能完成训练。
data2vec 2.0通过以下三种方式提高原始 data2vec 算法的效率:
- 为特定训练样例构建目标表征,并将该表征重用在掩码版本上。
- 类似于掩码自编码器(masked autoencoder, MAE),学生模型中的编码器网络并不运训练样例中的空白部分(blanked out)。
- 使用了一个更有效的解码器模型,不再依赖于Transformer网络,而是依赖于一个多层卷积网络。
![img](./images/speedup.png)
## 环境配置
1、关于本项目DCU显卡所需的特殊深度学习库可从光合开发者社区下载安装:
https://developer.hpccube.com/tool/
```
DTK驱动:dtk23.04
python:python3.8
torch:torch1.13
torchvision:1.13
torchaudio:1.13.1
apex:1.13
```
`Tips:以上dtk驱动、python、torch等DCU相关工具版本需要严格一一对应`
2、编译安装fairseq库
`Version: 0.12.2`
```
cd fairseq
pip install --editable ./ #或python setup.py build_ext --inplace
```
3、其它非特殊库参照requirements.txt安装
```
pip install -r requirements.txt
```
## 数据集
ILSVRC 2012:
https://image-net.org/challenges/LSVRC/index.php
`imagenet 2012` 的解压与整理方法参照链接:
https://www.jianshu.com/p/a42b7d863825
整理完成后的数据目录结构如下:
```
data
|
train
|
n01440764
n01806143
...
val
|
n04286575
n04596742
...
test
|
images
|
test_x.JPEG
test_xxx.JPEG
...
```
## 训练
进入主目录:
```
cd fairseq && mkdir logs
```
### 一、mpirun训练:
**多机多卡:**
```
sbatch datavec_mpi.sh
```
**单机多卡**(需先单独申请线上节点):
需先修改[`arguments.py`](./fairseq/distributed/utils.py)中的`call_main`函数:
`from`
```
#srun need auto rank: distributed_main_autorank
torch.multiprocessing.spawn(
fn=distributed_main_autorank,
args=(main, cfg, kwargs),
nprocs=1,
join=True,
)
'''
torch.multiprocessing.spawn(
fn=distributed_main,
args=(main, cfg, kwargs),
nprocs=min(
torch.cuda.device_count(),
cfg.distributed_training.distributed_world_size,
),
join=True,
)
'''
```
`to`
```
'''
#srun need auto rank: distributed_main_autorank
torch.multiprocessing.spawn(
fn=distributed_main_autorank,
args=(main, cfg, kwargs),
nprocs=1,
join=True,
)
'''
torch.multiprocessing.spawn(
fn=distributed_main,
args=(main, cfg, kwargs),
nprocs=min(
torch.cuda.device_count(),
cfg.distributed_training.distributed_world_size,
),
join=True,
)
```
然后运行:
```
sh datavec.sh
```
### 二、srun训练
**多机多卡:**
```
sbatch datavec_srun.sh
```
## 推理
参照fairseq源文档[`README.md`](./examples/data2vec/README.md),推理可以不用多节点,单节点算力即可。
## result
![img](./images/classify.png)
![img](./images/accuracy.png)
## 应用场景
### 算法类别
`图像分类`
### 应用行业
`制造,环境,医疗,气象`
### 算法框架
`pytorch`
## 参考资料
- https://github.com/facebookresearch/fairseq
<p align="center">
<img src="docs/fairseq_logo.png" width="150">
<br />
<br />
<a href="https://opensource.fb.com/support-ukraine"><img alt="Support Ukraine" src="https://img.shields.io/badge/Support-Ukraine-FFD500?style=flat&labelColor=005BBB" /></a>
<a href="https://github.com/pytorch/fairseq/blob/main/LICENSE"><img alt="MIT License" src="https://img.shields.io/badge/license-MIT-blue.svg" /></a>
<a href="https://github.com/pytorch/fairseq/releases"><img alt="Latest Release" src="https://img.shields.io/github/release/pytorch/fairseq.svg" /></a>
<a href="https://github.com/pytorch/fairseq/actions?query=workflow:build"><img alt="Build Status" src="https://github.com/pytorch/fairseq/workflows/build/badge.svg" /></a>
<a href="https://fairseq.readthedocs.io/en/latest/?badge=latest"><img alt="Documentation Status" src="https://readthedocs.org/projects/fairseq/badge/?version=latest" /></a>
<a href="https://app.circleci.com/pipelines/github/facebookresearch/fairseq/"><img alt="CicleCI Status" src="https://circleci.com/gh/facebookresearch/fairseq.svg?style=shield" /></a>
</p>
--------------------------------------------------------------------------------
Fairseq(-py) is a sequence modeling toolkit that allows researchers and
developers to train custom models for translation, summarization, language
modeling and other text generation tasks.
We provide reference implementations of various sequence modeling papers:
<details><summary>List of implemented papers</summary><p>
* **Convolutional Neural Networks (CNN)**
+ [Language Modeling with Gated Convolutional Networks (Dauphin et al., 2017)](examples/language_model/conv_lm/README.md)
+ [Convolutional Sequence to Sequence Learning (Gehring et al., 2017)](examples/conv_seq2seq/README.md)
+ [Classical Structured Prediction Losses for Sequence to Sequence Learning (Edunov et al., 2018)](https://github.com/pytorch/fairseq/tree/classic_seqlevel)
+ [Hierarchical Neural Story Generation (Fan et al., 2018)](examples/stories/README.md)
+ [wav2vec: Unsupervised Pre-training for Speech Recognition (Schneider et al., 2019)](examples/wav2vec/README.md)
* **LightConv and DynamicConv models**
+ [Pay Less Attention with Lightweight and Dynamic Convolutions (Wu et al., 2019)](examples/pay_less_attention_paper/README.md)
* **Long Short-Term Memory (LSTM) networks**
+ Effective Approaches to Attention-based Neural Machine Translation (Luong et al., 2015)
* **Transformer (self-attention) networks**
+ Attention Is All You Need (Vaswani et al., 2017)
+ [Scaling Neural Machine Translation (Ott et al., 2018)](examples/scaling_nmt/README.md)
+ [Understanding Back-Translation at Scale (Edunov et al., 2018)](examples/backtranslation/README.md)
+ [Adaptive Input Representations for Neural Language Modeling (Baevski and Auli, 2018)](examples/language_model/README.adaptive_inputs.md)
+ [Lexically constrained decoding with dynamic beam allocation (Post & Vilar, 2018)](examples/constrained_decoding/README.md)
+ [Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context (Dai et al., 2019)](examples/truncated_bptt/README.md)
+ [Adaptive Attention Span in Transformers (Sukhbaatar et al., 2019)](examples/adaptive_span/README.md)
+ [Mixture Models for Diverse Machine Translation: Tricks of the Trade (Shen et al., 2019)](examples/translation_moe/README.md)
+ [RoBERTa: A Robustly Optimized BERT Pretraining Approach (Liu et al., 2019)](examples/roberta/README.md)
+ [Facebook FAIR's WMT19 News Translation Task Submission (Ng et al., 2019)](examples/wmt19/README.md)
+ [Jointly Learning to Align and Translate with Transformer Models (Garg et al., 2019)](examples/joint_alignment_translation/README.md )
+ [Multilingual Denoising Pre-training for Neural Machine Translation (Liu et at., 2020)](examples/mbart/README.md)
+ [Neural Machine Translation with Byte-Level Subwords (Wang et al., 2020)](examples/byte_level_bpe/README.md)
+ [Unsupervised Quality Estimation for Neural Machine Translation (Fomicheva et al., 2020)](examples/unsupervised_quality_estimation/README.md)
+ [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations (Baevski et al., 2020)](examples/wav2vec/README.md)
+ [Generating Medical Reports from Patient-Doctor Conversations Using Sequence-to-Sequence Models (Enarvi et al., 2020)](examples/pointer_generator/README.md)
+ [Linformer: Self-Attention with Linear Complexity (Wang et al., 2020)](examples/linformer/README.md)
+ [Cross-lingual Retrieval for Iterative Self-Supervised Training (Tran et al., 2020)](examples/criss/README.md)
+ [Deep Transformers with Latent Depth (Li et al., 2020)](examples/latent_depth/README.md)
+ [Unsupervised Cross-lingual Representation Learning for Speech Recognition (Conneau et al., 2020)](https://arxiv.org/abs/2006.13979)
+ [Self-training and Pre-training are Complementary for Speech Recognition (Xu et al., 2020)](https://arxiv.org/abs/2010.11430)
+ [Robust wav2vec 2.0: Analyzing Domain Shift in Self-Supervised Pre-Training (Hsu, et al., 2021)](https://arxiv.org/abs/2104.01027)
+ [Unsupervised Speech Recognition (Baevski, et al., 2021)](https://arxiv.org/abs/2105.11084)
+ [Simple and Effective Zero-shot Cross-lingual Phoneme Recognition (Xu et al., 2021)](https://arxiv.org/abs/2109.11680)
+ [VideoCLIP: Contrastive Pre-training for Zero-shot Video-Text Understanding (Xu et. al., 2021)](https://arxiv.org/pdf/2109.14084.pdf)
+ [VLM: Task-agnostic Video-Language Model Pre-training for Video Understanding (Xu et. al., 2021)](https://aclanthology.org/2021.findings-acl.370.pdf)
+ [NormFormer: Improved Transformer Pretraining with Extra Normalization (Shleifer et. al, 2021)](examples/normformer/README.md)
* **Non-autoregressive Transformers**
+ Non-Autoregressive Neural Machine Translation (Gu et al., 2017)
+ Deterministic Non-Autoregressive Neural Sequence Modeling by Iterative Refinement (Lee et al. 2018)
+ Insertion Transformer: Flexible Sequence Generation via Insertion Operations (Stern et al. 2019)
+ Mask-Predict: Parallel Decoding of Conditional Masked Language Models (Ghazvininejad et al., 2019)
+ [Levenshtein Transformer (Gu et al., 2019)](examples/nonautoregressive_translation/README.md)
* **Finetuning**
+ [Better Fine-Tuning by Reducing Representational Collapse (Aghajanyan et al. 2020)](examples/rxf/README.md)
</p></details>
### What's New:
* May 2023 [Released models for Scaling Speech Technology to 1,000+ Languages (Pratap, et al., 2023)](examples/mms/README.md)
* June 2022 [Released code for wav2vec-U 2.0 from Towards End-to-end Unsupervised Speech Recognition (Liu, et al., 2022)](examples/wav2vec/unsupervised/README.md)
* May 2022 [Integration with xFormers](https://github.com/facebookresearch/xformers)
* December 2021 [Released Direct speech-to-speech translation code](examples/speech_to_speech/README.md)
* October 2021 [Released VideoCLIP and VLM models](examples/MMPT/README.md)
* October 2021 [Released multilingual finetuned XLSR-53 model](examples/wav2vec/README.md)
* September 2021 [`master` branch renamed to `main`](https://github.com/github/renaming).
* July 2021 [Released DrNMT code](examples/discriminative_reranking_nmt/README.md)
* July 2021 [Released Robust wav2vec 2.0 model](examples/wav2vec/README.md)
* June 2021 [Released XLMR-XL and XLMR-XXL models](examples/xlmr/README.md)
* May 2021 [Released Unsupervised Speech Recognition code](examples/wav2vec/unsupervised/README.md)
* March 2021 [Added full parameter and optimizer state sharding + CPU offloading](examples/fully_sharded_data_parallel/README.md)
* February 2021 [Added LASER training code](examples/laser/README.md)
* December 2020: [Added Adaptive Attention Span code](examples/adaptive_span/README.md)
* December 2020: [GottBERT model and code released](examples/gottbert/README.md)
* November 2020: Adopted the [Hydra](https://github.com/facebookresearch/hydra) configuration framework
* [see documentation explaining how to use it for new and existing projects](docs/hydra_integration.md)
* November 2020: [fairseq 0.10.0 released](https://github.com/pytorch/fairseq/releases/tag/v0.10.0)
* October 2020: [Added R3F/R4F (Better Fine-Tuning) code](examples/rxf/README.md)
* October 2020: [Deep Transformer with Latent Depth code released](examples/latent_depth/README.md)
* October 2020: [Added CRISS models and code](examples/criss/README.md)
<details><summary>Previous updates</summary><p>
* September 2020: [Added Linformer code](examples/linformer/README.md)
* September 2020: [Added pointer-generator networks](examples/pointer_generator/README.md)
* August 2020: [Added lexically constrained decoding](examples/constrained_decoding/README.md)
* August 2020: [wav2vec2 models and code released](examples/wav2vec/README.md)
* July 2020: [Unsupervised Quality Estimation code released](examples/unsupervised_quality_estimation/README.md)
* May 2020: [Follow fairseq on Twitter](https://twitter.com/fairseq)
* April 2020: [Monotonic Multihead Attention code released](examples/simultaneous_translation/README.md)
* April 2020: [Quant-Noise code released](examples/quant_noise/README.md)
* April 2020: [Initial model parallel support and 11B parameters unidirectional LM released](examples/megatron_11b/README.md)
* March 2020: [Byte-level BPE code released](examples/byte_level_bpe/README.md)
* February 2020: [mBART model and code released](examples/mbart/README.md)
* February 2020: [Added tutorial for back-translation](https://github.com/pytorch/fairseq/tree/main/examples/backtranslation#training-your-own-model-wmt18-english-german)
* December 2019: [fairseq 0.9.0 released](https://github.com/pytorch/fairseq/releases/tag/v0.9.0)
* November 2019: [VizSeq released (a visual analysis toolkit for evaluating fairseq models)](https://facebookresearch.github.io/vizseq/docs/getting_started/fairseq_example)
* November 2019: [CamemBERT model and code released](examples/camembert/README.md)
* November 2019: [BART model and code released](examples/bart/README.md)
* November 2019: [XLM-R models and code released](examples/xlmr/README.md)
* September 2019: [Nonautoregressive translation code released](examples/nonautoregressive_translation/README.md)
* August 2019: [WMT'19 models released](examples/wmt19/README.md)
* July 2019: fairseq relicensed under MIT license
* July 2019: [RoBERTa models and code released](examples/roberta/README.md)
* June 2019: [wav2vec models and code released](examples/wav2vec/README.md)
</p></details>
### Features:
* multi-GPU training on one machine or across multiple machines (data and model parallel)
* fast generation on both CPU and GPU with multiple search algorithms implemented:
+ beam search
+ Diverse Beam Search ([Vijayakumar et al., 2016](https://arxiv.org/abs/1610.02424))
+ sampling (unconstrained, top-k and top-p/nucleus)
+ [lexically constrained decoding](examples/constrained_decoding/README.md) (Post & Vilar, 2018)
* [gradient accumulation](https://fairseq.readthedocs.io/en/latest/getting_started.html#large-mini-batch-training-with-delayed-updates) enables training with large mini-batches even on a single GPU
* [mixed precision training](https://fairseq.readthedocs.io/en/latest/getting_started.html#training-with-half-precision-floating-point-fp16) (trains faster with less GPU memory on [NVIDIA tensor cores](https://developer.nvidia.com/tensor-cores))
* [extensible](https://fairseq.readthedocs.io/en/latest/overview.html): easily register new models, criterions, tasks, optimizers and learning rate schedulers
* [flexible configuration](docs/hydra_integration.md) based on [Hydra](https://github.com/facebookresearch/hydra) allowing a combination of code, command-line and file based configuration
* [full parameter and optimizer state sharding](examples/fully_sharded_data_parallel/README.md)
* [offloading parameters to CPU](examples/fully_sharded_data_parallel/README.md)
We also provide [pre-trained models for translation and language modeling](#pre-trained-models-and-examples)
with a convenient `torch.hub` interface:
``` python
en2de = torch.hub.load('pytorch/fairseq', 'transformer.wmt19.en-de.single_model')
en2de.translate('Hello world', beam=5)
# 'Hallo Welt'
```
See the PyTorch Hub tutorials for [translation](https://pytorch.org/hub/pytorch_fairseq_translation/)
and [RoBERTa](https://pytorch.org/hub/pytorch_fairseq_roberta/) for more examples.
# Requirements and Installation
* [PyTorch](http://pytorch.org/) version >= 1.10.0
* Python version >= 3.8
* For training new models, you'll also need an NVIDIA GPU and [NCCL](https://github.com/NVIDIA/nccl)
* **To install fairseq** and develop locally:
``` bash
git clone https://github.com/pytorch/fairseq
cd fairseq
pip install --editable ./
# on MacOS:
# CFLAGS="-stdlib=libc++" pip install --editable ./
# to install the latest stable release (0.10.x)
# pip install fairseq
```
* **For faster training** install NVIDIA's [apex](https://github.com/NVIDIA/apex) library:
``` bash
git clone https://github.com/NVIDIA/apex
cd apex
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" \
--global-option="--deprecated_fused_adam" --global-option="--xentropy" \
--global-option="--fast_multihead_attn" ./
```
* **For large datasets** install [PyArrow](https://arrow.apache.org/docs/python/install.html#using-pip): `pip install pyarrow`
* If you use Docker make sure to increase the shared memory size either with `--ipc=host` or `--shm-size`
as command line options to `nvidia-docker run` .
# Getting Started
The [full documentation](https://fairseq.readthedocs.io/) contains instructions
for getting started, training new models and extending fairseq with new model
types and tasks.
# Pre-trained models and examples
We provide pre-trained models and pre-processed, binarized test sets for several tasks listed below,
as well as example training and evaluation commands.
* [Translation](examples/translation/README.md): convolutional and transformer models are available
* [Language Modeling](examples/language_model/README.md): convolutional and transformer models are available
We also have more detailed READMEs to reproduce results from specific papers:
* [XLS-R: Self-supervised Cross-lingual Speech Representation Learning at Scale (Babu et al., 2021)](examples/wav2vec/xlsr/README.md)
* [Cross-lingual Retrieval for Iterative Self-Supervised Training (Tran et al., 2020)](examples/criss/README.md)
* [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations (Baevski et al., 2020)](examples/wav2vec/README.md)
* [Unsupervised Quality Estimation for Neural Machine Translation (Fomicheva et al., 2020)](examples/unsupervised_quality_estimation/README.md)
* [Training with Quantization Noise for Extreme Model Compression ({Fan*, Stock*} et al., 2020)](examples/quant_noise/README.md)
* [Neural Machine Translation with Byte-Level Subwords (Wang et al., 2020)](examples/byte_level_bpe/README.md)
* [Multilingual Denoising Pre-training for Neural Machine Translation (Liu et at., 2020)](examples/mbart/README.md)
* [Reducing Transformer Depth on Demand with Structured Dropout (Fan et al., 2019)](examples/layerdrop/README.md)
* [Jointly Learning to Align and Translate with Transformer Models (Garg et al., 2019)](examples/joint_alignment_translation/README.md)
* [Levenshtein Transformer (Gu et al., 2019)](examples/nonautoregressive_translation/README.md)
* [Facebook FAIR's WMT19 News Translation Task Submission (Ng et al., 2019)](examples/wmt19/README.md)
* [RoBERTa: A Robustly Optimized BERT Pretraining Approach (Liu et al., 2019)](examples/roberta/README.md)
* [wav2vec: Unsupervised Pre-training for Speech Recognition (Schneider et al., 2019)](examples/wav2vec/README.md)
* [Mixture Models for Diverse Machine Translation: Tricks of the Trade (Shen et al., 2019)](examples/translation_moe/README.md)
* [Pay Less Attention with Lightweight and Dynamic Convolutions (Wu et al., 2019)](examples/pay_less_attention_paper/README.md)
* [Understanding Back-Translation at Scale (Edunov et al., 2018)](examples/backtranslation/README.md)
* [Classical Structured Prediction Losses for Sequence to Sequence Learning (Edunov et al., 2018)](https://github.com/pytorch/fairseq/tree/classic_seqlevel)
* [Hierarchical Neural Story Generation (Fan et al., 2018)](examples/stories/README.md)
* [Scaling Neural Machine Translation (Ott et al., 2018)](examples/scaling_nmt/README.md)
* [Convolutional Sequence to Sequence Learning (Gehring et al., 2017)](examples/conv_seq2seq/README.md)
* [Language Modeling with Gated Convolutional Networks (Dauphin et al., 2017)](examples/language_model/README.conv.md)
# Join the fairseq community
* Twitter: https://twitter.com/fairseq
* Facebook page: https://www.facebook.com/groups/fairseq.users
* Google group: https://groups.google.com/forum/#!forum/fairseq-users
# License
fairseq(-py) is MIT-licensed.
The license applies to the pre-trained models as well.
# Citation
Please cite as:
``` bibtex
@inproceedings{ott2019fairseq,
title = {fairseq: A Fast, Extensible Toolkit for Sequence Modeling},
author = {Myle Ott and Sergey Edunov and Alexei Baevski and Angela Fan and Sam Gross and Nathan Ng and David Grangier and Michael Auli},
booktitle = {Proceedings of NAACL-HLT 2019: Demonstrations},
year = {2019},
}
```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment