Commit 7143f128 authored by sunxx1's avatar sunxx1
Browse files

Merge branch 'hepj-test' into 'main'

更新transformer代码

See merge request dcutoolkit/deeplearing/dlexamples_new!47
parents a30b77fe c0f05c10
#!/usr/bin/env python3 -u
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the LICENSE file in
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.
#
#-------------------------------------------------------------------------
#
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import os
import math
import time
import ctypes
from copy import deepcopy
import torch
import sacrebleu
import dllogger as DLLogger
from fairseq import data, distributed_utils, options, utils, tokenizer
from fairseq.ddp_trainer import DDPTrainer
from fairseq.meters import StopwatchMeter
from fairseq.sequence_generator import SequenceGenerator
from fairseq.data import data_utils, load_dataset_splits
from fairseq.models import build_model
from fairseq.log_helper import setup_logger, reset_perf_meters
def main(args):
print(args)
setup_logger(args)
if not torch.cuda.is_available():
raise NotImplementedError('Training on CPU is not supported')
torch.cuda.set_device(args.device_id)
if args.distributed_world_size > 1:
assert torch.distributed.is_initialized()
torch.distributed.broadcast(torch.tensor([1], device="cuda"), 0)
torch.cuda.synchronize()
pValue = ctypes.cast((ctypes.c_int * 1)(), ctypes.POINTER(ctypes.c_int))
ctypes.CDLL('libcudart.so').cudaDeviceSetLimit(ctypes.c_int(0x05), ctypes.c_int(128))
ctypes.CDLL('libcudart.so').cudaDeviceGetLimit(pValue, ctypes.c_int(0x05))
torch.manual_seed(args.seed)
src_dict, tgt_dict = data_utils.load_dictionaries(args)
add_extra_items_to_checkpoint({'src_dict': src_dict, 'tgt_dict': tgt_dict})
datasets = load_dataset_splits(args, ['train', 'valid', 'test'], src_dict, tgt_dict)
model = build_model(args)
print('| num. model params: {}'.format(sum(p.numel() for p in model.parameters())))
# Build trainer
if torch.cuda.get_device_capability(0)[0] >= 7 and not args.amp:
print('| NOTICE: your device may support faster training with --amp')
trainer = DDPTrainer(args, model)
print('| model {}, criterion {}'.format(args.arch, trainer.criterion.__class__.__name__))
print('| training on {} GPUs'.format(args.distributed_world_size))
print('| max tokens per GPU = {} and max sentences per GPU = {}'.format(
args.max_tokens,
args.max_sentences,
))
epoch_itr = data.EpochBatchIterator(
dataset=datasets[args.train_subset],
max_tokens=args.max_tokens,
max_sentences=args.max_sentences_valid,
max_positions=args.max_positions,
required_batch_size_multiple=8,
seed=args.seed,
num_shards=args.distributed_world_size,
shard_id=args.distributed_rank,
)
# Load the latest checkpoint if one is available
load_checkpoint(args, trainer, epoch_itr)
# Send a dummy batch to warm the caching allocator
dummy_batch = data_utils.get_dummy_batch(args.max_tokens, src_dict, tgt_dict)
trainer.dummy_train_step(dummy_batch)
# Sanity check
if args.do_sanity_check:
print('Performing sanity check...')
sanity_score = score(args, trainer, datasets['test'], src_dict, tgt_dict, 'test.raw.de')
DLLogger.log(step='SANITY_CHECK', data={'sanity_check_score': sanity_score}, verbosity=1)
# Train until the learning rate gets too small or model reaches target score
max_epoch = args.max_epoch or math.inf
max_update = args.max_update or math.inf
tgt_bleu = args.target_bleu or math.inf
current_bleu = 0.0
best_bleu = -1.0
lr = trainer.get_lr()
train_meter = StopwatchMeter()
train_meter.start()
valid_losses = [None]
valid_subsets = args.valid_subset.split(',')
run_summary = {'loss': float('inf'),
'val_loss': float('inf'),
'speed': 0,
'accuracy': 0}
while lr >= args.min_lr and epoch_itr.epoch < max_epoch and trainer.get_num_updates() < max_update and current_bleu < tgt_bleu:
DLLogger.log(step=trainer.get_num_updates()+1, data={'epoch': epoch_itr.epoch}, verbosity=0)
# train for one epoch
train(args, trainer, epoch_itr)
DLLogger.log(step=trainer.get_num_updates(), data={'walltime': train_meter.sum}, verbosity=1)
DLLogger.log(step=trainer.get_num_updates(),
data={'avg_epoch_loss': trainer.avg_loss_meter.avg}, verbosity=1)
if epoch_itr.epoch % args.validate_interval == 0:
valid_losses = validate(args, trainer, datasets, valid_subsets)
valid_bleu = score(args, trainer, datasets[valid_subsets[0]], src_dict, tgt_dict, 'valid.raw.de')
DLLogger.log(step=trainer.get_num_updates(),
data={'val_loss': valid_losses[0], 'val_bleu': valid_bleu}, verbosity=1)
# Eval BLEU score
if args.online_eval or (tgt_bleu is not math.inf):
current_bleu = score(args, trainer, datasets[args.gen_subset], src_dict, tgt_dict, 'test.raw.de')
DLLogger.log(step=trainer.get_num_updates(), data={'test_bleu': current_bleu}, verbosity=1)
best_bleu = max(best_bleu, current_bleu)
run_summary['val_loss'] = min(run_summary['val_loss'], valid_losses[0])
run_summary['accuracy'] = best_bleu if best_bleu >= 0 else valid_bleu
run_summary['loss'] = valid_losses[0]
run_summary['speed'] = trainer.throughput_meter.u_avg
# Only use first validation loss to update the learning rate
lr = trainer.lr_step(epoch_itr.epoch, valid_losses[0])
save_checkpoint(args, trainer, epoch_itr, valid_losses[0])
train_meter.stop()
run_summary['walltime'] = train_meter.sum
DLLogger.log(step=(), data=run_summary, verbosity=0)
print('| done training in {:.1f} seconds'.format(train_meter.sum))
def train(args, trainer, epoch_itr):
"""Train the model for one epoch."""
# Initialize data iterator
itr = epoch_itr.next_epoch_itr()
# update parameters every N batches
if epoch_itr.epoch <= len(args.update_freq):
update_freq = args.update_freq[epoch_itr.epoch - 1]
else:
update_freq = args.update_freq[-1]
max_update = args.max_update or math.inf
num_batches = len(epoch_itr)
begin = time.time()
# reset meters
DLLogger.flush()
trainer.get_throughput_meter().reset()
for i, sample in enumerate(itr):
if i < num_batches - 1 and (i + 1) % update_freq > 0:
# buffer updates according to --update-freq
trainer.train_step(sample, update_params=False, last_step=(i == len(itr)-1))
continue
else:
trainer.train_step(sample, update_params=True, last_step=(i == len(itr)-1))
# ignore the first mini-batch in words-per-second calculation
if i == 0:
trainer.get_throughput_meter().reset()
reset_perf_meters()
if (i+1) % args.log_interval == 0:
DLLogger.flush()
if trainer.get_num_updates() >= max_update:
break
print('Epoch time:', time.time() - begin)
# Print epoch stats and reset training meters
DLLogger.log(step=trainer.get_num_updates(),
data={'speed': trainer.get_throughput_meter().avg}, verbosity=0)
DLLogger.flush()
def validate(args, trainer, datasets, subsets):
"""Evaluate the model on the validation set(s) and return the losses."""
valid_losses = []
for subset in subsets:
if len(subsets) > 1:
print('Validating on \'{}\' subset'.format(subset))
# Initialize data iterator
itr = data.EpochBatchIterator(
dataset=datasets[subset],
max_tokens=args.max_tokens,
max_sentences=args.max_sentences_valid,
max_positions=args.max_positions,
required_batch_size_multiple=8,
seed=args.seed,
num_shards=args.distributed_world_size,
shard_id=args.distributed_rank,
).next_epoch_itr(shuffle=False)
# reset validation loss meters
DLLogger.flush()
subset_losses = []
for sample in itr:
loss = trainer.valid_step(sample)
subset_losses.append(loss)
subset_loss = sum(subset_losses)/len(subset_losses)
DLLogger.flush()
valid_losses.append(subset_loss)
print(f'Validation loss on subset {subset}: {subset_loss}')
return valid_losses
def score(args, trainer, dataset, src_dict, tgt_dict, ref_file):
begin = time.time()
src_dict = deepcopy(src_dict) # This is necessary, generation of translations
tgt_dict = deepcopy(tgt_dict) # alters target dictionary messing up with the rest of training
model = trainer.get_model()
# Initialize data iterator
itr = data.EpochBatchIterator(
dataset=dataset,
max_tokens=None,
max_sentences=max(8, min(math.ceil(1024/args.distributed_world_size), 128)),
max_positions=args.max_positions,
required_batch_size_multiple=8,
num_shards=args.distributed_world_size,
shard_id=args.distributed_rank,
).next_epoch_itr(shuffle=False)
# Initialize generator
gen_timer = StopwatchMeter()
translator = SequenceGenerator(
[model],
tgt_dict.get_metadata(),
maxlen=args.max_target_positions - 1, # do not include EOS token
beam_size=args.beam,
stop_early=(not args.no_early_stop), normalize_scores=(not args.unnormalized),
len_penalty=args.lenpen, unk_penalty=args.unkpen,
sampling=args.sampling, sampling_topk=args.sampling_topk, minlen=args.min_len,
use_amp=args.amp,
)
# Generate and compute BLEU
predictions = []
translations = translator.generate_batched_itr(
itr, maxlen_a=args.max_len_a, maxlen_b=args.max_len_b,
cuda=True, timer=gen_timer, prefix_size=args.prefix_size,
)
for sample_id, src_tokens, _, hypos in translations:
# Process input and grount truth
src_str = src_dict.string(src_tokens, args.remove_bpe)
# Process top predictions
for i, hypo in enumerate(hypos[:min(len(hypos), args.nbest)]):
_, hypo_str, _ = utils.post_process_prediction(
hypo_tokens=hypo['tokens'].int().cpu(),
src_str=src_str,
alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None,
align_dict=None,
tgt_dict=tgt_dict,
remove_bpe=args.remove_bpe
)
# Score only the top hypothesis
if i == 0:
hypo_str = tokenizer.Tokenizer.detokenize(hypo_str, 'de')
predictions.append('{}\t{}'.format(sample_id, hypo_str))
if args.distributed_world_size > 1:
predictions = _all_gather_predictions(predictions)
with open(os.path.join(args.data, ref_file), 'r') as reference:
refs = [reference.readlines()]
# reducing indexed predictions as strings is more memory efficient than reducing tuples
predictions = [tuple(item.split('\t')) for item in predictions]
predictions = [(int(item[0]), item[1]) for item in predictions]
predictions.sort(key=lambda tup: tup[0])
predictions = [hypo[1] + ('\n' if hypo[1][-1] != '\n' else '') for hypo in predictions]
sacrebleu_score = sacrebleu.corpus_bleu(predictions, refs, lowercase=not args.test_cased_bleu).score
if args.save_predictions:
os.makedirs(os.path.join(args.save_dir, 'predictions'), exist_ok=True)
fname = ref_file + '.pred.update_{}'.format(trainer.get_num_updates())
save_path = os.path.join(args.save_dir, 'predictions', fname)
with open(save_path, 'w') as f:
f.write(''.join(predictions))
DLLogger.log(step=trainer.get_num_updates(),
data={'inference tokens/s': float(args.distributed_world_size) / gen_timer.avg},
verbosity=0)
DLLogger.flush()
if gen_timer.sum != 0:
print('| Translated {} sentences ({} tokens) in {:.1f}s ({:.2f} sentences/s, {:.2f} tokens/s)'.format(
len(predictions),
gen_timer.n,
gen_timer.sum,
len(predictions) / gen_timer.sum,
float(args.distributed_world_size)/gen_timer.avg
))
print('| Eval completed in: {:.2f}s | {}CASED BLEU {:.2f}'.format(
time.time()-begin,
'' if args.test_cased_bleu else 'UN',
sacrebleu_score
))
return sacrebleu_score
def _all_gather_predictions(predictions):
ready = False
all_ready = False
reduced_predictions = []
max_size = 65000
while not all_ready:
lst_len = len(predictions)
size = 2000 # some extra space for python stuff
n = 0
while n < lst_len:
str_len = len(predictions[n].encode('utf8')) + 8 # per string pickle overhead
if size + str_len >= max_size:
break
size += str_len
n += 1
chunk = predictions[:n]
predictions = predictions[n:]
if not predictions:
ready = True
chunk = (ready, chunk)
torch.cuda.synchronize()
gathered = distributed_utils.all_gather_list(chunk, max_size=65000)
torch.cuda.synchronize()
reduced_predictions += [t[1] for t in gathered]
all_ready = all([t[0] for t in gathered])
reduced_predictions = [item for sublist in reduced_predictions for item in sublist]
return reduced_predictions
def save_checkpoint(args, trainer, epoch_itr, val_loss):
if epoch_itr.epoch % args.save_interval != 0:
return
if args.no_save or not distributed_utils.is_master(args):
return
epoch = epoch_itr.epoch
end_of_epoch = epoch_itr.end_of_epoch()
checkpoint_conds = collections.OrderedDict()
checkpoint_conds['checkpoint{}.pt'.format(epoch)] = end_of_epoch and not args.no_epoch_checkpoints
checkpoint_conds['checkpoint_best.pt'] = (
val_loss is not None and
(not hasattr(save_checkpoint, 'best') or val_loss < save_checkpoint.best)
)
checkpoint_conds['checkpoint_last.pt'] = True # keep this last so that it's a symlink
prev_best = getattr(save_checkpoint, 'best', val_loss)
if val_loss is not None:
save_checkpoint.best = min(val_loss, prev_best)
extra_state = {
'best': save_checkpoint.best,
'train_iterator': epoch_itr.state_dict(),
'val_loss': val_loss,
}
extra_state.update(save_checkpoint.extra_items)
checkpoints = [os.path.join(args.save_dir, 'checkpoints', fn)
for fn, cond in checkpoint_conds.items() if cond]
if checkpoints:
for cp in checkpoints:
trainer.save_checkpoint(cp, extra_state)
def add_extra_items_to_checkpoint(items):
if not hasattr(save_checkpoint, 'extra_items'):
save_checkpoint.extra_items = {}
save_checkpoint.extra_items.update(items)
def load_checkpoint(args, trainer, epoch_itr):
"""Load a checkpoint and replay dataloader to match."""
os.makedirs(os.path.join(args.save_dir, 'checkpoints'), exist_ok=True)
checkpoint_path = os.path.join(args.save_dir, 'checkpoints', args.restore_file)
if os.path.isfile(checkpoint_path):
extra_state = trainer.load_checkpoint(checkpoint_path)
if extra_state is not None:
# replay train iterator to match checkpoint
epoch_itr.load_state_dict(extra_state['train_iterator'])
print('| loaded checkpoint {} (epoch {} @ {} updates)'.format(
checkpoint_path, epoch_itr.epoch, trainer.get_num_updates()))
trainer.lr_step(epoch_itr.epoch)
trainer.lr_step_update(trainer.get_num_updates())
if 'best' in extra_state:
save_checkpoint.best = extra_state['best']
if __name__ == '__main__':
parser = options.get_training_parser()
ARGS = options.parse_args_and_arch(parser)
if ARGS.distributed_world_size > 1:
distributed_utils.distributed_init(ARGS)
main(ARGS)
# Use 2.1 for orbs
version: 2.1
# -------------------------------------------------------------------------------------
# Environments to run the jobs in
# -------------------------------------------------------------------------------------
gpu: &gpu
environment:
CUDA_VERSION: "11.2"
machine:
image: ubuntu-2004-cuda-11.2:202103-01
resource_class: gpu.nvidia.medium.multi
# -------------------------------------------------------------------------------------
# Re-usable commands
# -------------------------------------------------------------------------------------
cache_key: &cache_key cache-key-{{ .Environment.CIRCLE_JOB }}-{{ checksum ".circleci/config.yml" }}-{{ checksum "setup.py"}}
install_dep_common: &install_dep_common
- run:
name: Install Common Dependencies
command: |
source activate fairseq
pip install --upgrade setuptools
pip install bitarray boto3 deepspeed editdistance fastBPE iopath ipdb ipython pyarrow pytest sacremoses sentencepiece subword-nmt hydra-core==1.0.7 omegaconf==2.0.6
pip install --progress-bar off pytest
pip install --progress-bar off fairscale
pip install -i https://test.pypi.org/simple/ bitsandbytes-cuda111 -U
python -c 'import torch; print("Torch version:", torch.__version__)'
python -m torch.utils.collect_env
install_dep_fused_ops: &install_dep_fused_ops
- run:
name: Install Megatron/Apex Dependencies
working_directory: ~/
command: |
source activate fairseq
git clone https://github.com/NVIDIA/apex
cd apex
git checkout e2083df5eb96643c61613b9df48dd4eea6b07690
sed -i '101,107 s/^/#/' setup.py
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--deprecated_fused_adam" --global-option="--xentropy" --global-option="--fast_multihead_attn" ./
cd ~/
git clone --depth=1 --branch v2.4 https://github.com/NVIDIA/Megatron-LM.git
cd Megatron-LM
pip install -e .
install_dep_xformers: &install_dep_xformers
- run:
name: Install xFormers Dependencies
working_directory: ~/
command: |
source activate fairseq
git clone https://github.com/facebookresearch/xformers.git
cd xformers
pip install -r requirements.txt
pip install -e .
install_dep_pt19: &install_dep_pt19
- run:
name: Install Pytorch Dependencies
command: |
source activate fairseq
pip install --upgrade setuptools
pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html
python -c 'import torch; print("Torch version:", torch.__version__)'
install_dep_pt18: &install_dep_pt18
- run:
name: Install Pytorch Dependencies
command: |
source activate fairseq
pip install --upgrade setuptools
pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
python -c 'import torch; print("Torch version:", torch.__version__)'
install_repo: &install_repo
- run:
name: Install Repository
command: |
source activate fairseq
pip install .
python setup.py build_ext --inplace
run_unittests: &run_unittests
- run:
name: Run Unit Tests
command: |
source activate fairseq
pytest tests/gpu/test_binaries_gpu.py
check_nvidia_driver: &check_nvidia_driver
- run:
name: Check NVIDIA Driver
working_directory: ~/
command: |
pyenv versions
nvidia-smi
create_conda_env: &create_conda_env
- run:
name: Install and Create Conda Environment
command: |
curl -o ~/miniconda.sh -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
chmod +x ~/miniconda.sh
~/miniconda.sh -b -p $HOME/miniconda
rm ~/miniconda.sh
echo 'export PATH=$HOME/miniconda/bin:$PATH' >> $BASH_ENV
source $BASH_ENV
if [ ! -d ~/miniconda/envs/fairseq ]
then
conda create -y -n fairseq python=3.8
fi
source activate fairseq
python --version
pip install --upgrade pip
# -------------------------------------------------------------------------------------
# Jobs to run
# -------------------------------------------------------------------------------------
jobs:
gpu_tests_pt19:
<<: *gpu
working_directory: ~/fairseq-py
steps:
- checkout
- <<: *check_nvidia_driver
- <<: *create_conda_env
- restore_cache:
key: *cache_key
- <<: *install_dep_pt19
- <<: *install_dep_common
- <<: *install_dep_fused_ops
- save_cache:
paths:
- ~/miniconda/
key: *cache_key
- <<: *install_repo
- <<: *run_unittests
gpu_tests_pt18:
<<: *gpu
working_directory: ~/fairseq-py
steps:
- checkout
- <<: *check_nvidia_driver
- <<: *create_conda_env
- restore_cache:
key: *cache_key
- <<: *install_dep_pt18
- <<: *install_dep_common
- <<: *install_dep_fused_ops
- save_cache:
paths:
- ~/miniconda/
key: *cache_key
- <<: *install_repo
- <<: *run_unittests
workflows:
version: 2
build:
jobs:
- gpu_tests_pt18
- gpu_tests_pt19
# Setting up CODEOWNERS for UST related codebase
# Documentation for open sourced models relevant to UST
examples/speech_to_text @kahne @sravyapopuri388 @jmp84
examples/speech_to_speech @an918tw @sravyapopuri388 @jmp84
examples/speech_synthesis @kahne @jmp84
examples/simultaneous_translation @kahne @jmp84
examples/speech_text_joint_to_text @yuntang @jmp84
# Speech related models relevant to UST
fairseq/models/speech_to_speech @sravyapopuri388 @jmp84
fairseq/models/speech_to_text @kahne @sravyapopuri388 @jmp84
fairseq/models/text_to_speech @kahne @jmp84
# CONFORMER IMPLEMENTATION
fairseq/modules/conformer_layer.py @sravyapopuri388 @jmp84
fairseq/modules/espnet_multihead_attention.py @sravyapopuri388 @jmp84
fairseq/modules/rotary_positional_embedding.py @sravyapopuri388 @jmp84
fairseq/modules/positional_encoding.py @sravyapopuri388 @jmp84
## 👉 [Please follow one of these issue templates](https://github.com/pytorch/fairseq/issues/new/choose) 👈
Note: to keep the backlog clean and actionable, issues may be immediately closed if they do not follow one of the above issue templates.
---
name: 🐛 Bug Report
about: Submit a bug report to help us improve
labels: 'bug, needs triage'
---
## 🐛 Bug
<!-- A clear and concise description of what the bug is. -->
### To Reproduce
Steps to reproduce the behavior (**always include the command you ran**):
1. Run cmd '....'
2. See error
<!-- If you have a code sample, error messages, stack traces, please provide it here as well -->
#### Code sample
<!-- Ideally attach a minimal code sample to reproduce the decried issue.
Minimal means having the shortest code but still preserving the bug. -->
### Expected behavior
<!-- A clear and concise description of what you expected to happen. -->
### Environment
- fairseq Version (e.g., 1.0 or main):
- PyTorch Version (e.g., 1.0)
- OS (e.g., Linux):
- How you installed fairseq (`pip`, source):
- Build command you used (if compiling from source):
- Python version:
- CUDA/cuDNN version:
- GPU models and configuration:
- Any other relevant information:
### Additional context
<!-- Add any other context about the problem here. -->
---
name: 📚 Documentation/Typos
about: Report an issue related to documentation or a typo
labels: 'documentation, needs triage'
---
## 📚 Documentation
For typos and doc fixes, please go ahead and:
1. Create an issue.
2. Fix the typo.
3. Submit a PR.
Thanks!
---
name: 🚀 Feature Request
about: Submit a proposal/request for a new feature
labels: 'enhancement, help wanted, needs triage'
---
## 🚀 Feature Request
<!-- A clear and concise description of the feature proposal -->
### Motivation
<!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
### Pitch
<!-- A clear and concise description of what you want to happen. -->
### Alternatives
<!-- A clear and concise description of any alternative solutions or features you've considered, if any. -->
### Additional context
<!-- Add any other context or screenshots about the feature request here. -->
---
name: ❓ Questions/Help
about: If you have questions, please first search existing issues and docs
labels: 'question, needs triage'
---
## ❓ Questions and Help
### Before asking:
1. search the issues.
2. search the docs.
<!-- If you still can't find what you need: -->
#### What is your question?
#### Code
<!-- Please paste a code snippet if your question requires it! -->
#### What have you tried?
#### What's your environment?
- fairseq Version (e.g., 1.0 or main):
- PyTorch Version (e.g., 1.0)
- OS (e.g., Linux):
- How you installed fairseq (`pip`, source):
- Build command you used (if compiling from source):
- Python version:
- CUDA/cuDNN version:
- GPU models and configuration:
- Any other relevant information:
# Before submitting
- [ ] Was this discussed/approved via a Github issue? (no need for typos, doc improvements)
- [ ] Did you read the [contributor guideline](https://github.com/pytorch/fairseq/blob/main/CONTRIBUTING.md)?
- [ ] Did you make sure to update the docs?
- [ ] Did you write any new necessary tests?
## What does this PR do?
Fixes # (issue).
## PR review
Anyone in the community is free to review the PR once the tests have passed.
If we didn't discuss your PR in Github issues there's a high chance it will not be merged.
## Did you have fun?
Make sure you had fun coding 🙃
# Configuration for probot-stale - https://github.com/probot/stale
# Mostly copied from github.com/facebook/react/blob/master/.github/stale.yml
# Number of days of inactivity before an issue becomes stale
daysUntilStale: 90
# Number of days of inactivity before a stale issue is closed
daysUntilClose: 7
# Issues with these labels will never be considered stale
exemptLabels:
- bug
# Label to use when marking an issue as stale
staleLabel: stale
issues:
# Comment to post when marking an issue as stale.
markComment: >
This issue has been automatically marked as stale.
**If this issue is still affecting you, please leave any comment** (for example, "bump"), and we'll keep it open.
We are sorry that we haven't been able to prioritize it yet. If you have any new additional information, please include it with your comment!
# Comment to post when closing a stale issue.
closeComment: >
Closing this issue after a prolonged period of inactivity. If this issue is still present in the latest release, please create a new issue with up-to-date information. Thank you!
pulls:
# Comment to post when marking a pull request as stale.
markComment: >
This pull request has been automatically marked as stale.
**If this pull request is still relevant, please leave any comment** (for example, "bump"), and we'll keep it open.
We are sorry that we haven't been able to prioritize reviewing it yet. Your contribution is very much appreciated.
# Comment to post when closing a stale pull request.
closeComment: >
Closing this pull request after a prolonged period of inactivity. If this issue is still present in the latest release, please ask for this pull request to be reopened. Thank you!
name: build
on:
# Trigger the workflow on push to main or any pull request
push:
branches:
- main
pull_request:
jobs:
build:
strategy:
max-parallel: 4
matrix:
platform: [ubuntu-latest, macos-latest]
python-version: [3.8, 3.9]
runs-on: ${{ matrix.platform }}
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Conditionally install pytorch
if: matrix.platform == 'windows-latest'
run: pip3 install torch -f https://download.pytorch.org/whl/torch_stable.html
- name: Install locally
run: |
python -m pip install --upgrade pip
git submodule update --init --recursive
python setup.py build_ext --inplace
python -m pip install --editable .
- name: Install optional test requirements
run: |
python -m pip install iopath transformers pyarrow
python -m pip install git+https://github.com/facebookresearch/fairscale.git@main
python -m pip install --progress-bar off git+https://github.com/facebookresearch/xformers.git@main
python -m pip install pytest
- name: Lint with flake8
run: |
pip install flake8
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --extend-exclude fairseq/model_parallel/megatron
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --extend-exclude fairseq/model_parallel/megatron
- name: Run tests
run: |
python setup.py test
- name: Lint with black
run: |
pip install black==22.3.0
black --check . --extend-exclude 'examples|fairseq\/model_parallel\/megatron'
name: build_wheels
on:
push:
branches:
- v[0-9]+.[0-9]+.[x0-9]+
tags:
- v*
jobs:
build_wheels:
name: Build wheels on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
steps:
- uses: actions/checkout@v2
- name: Install Python
uses: actions/setup-python@v2
with:
python-version: '3.8'
- name: Upgrade pip
run: |
python3 -m pip install --upgrade pip
- name: Install cibuildwheel
run: |
python3 -m pip install cibuildwheel
- name: Build wheels for CPython
run: |
python3 -m cibuildwheel --output-dir dist
env:
CIBW_BUILD: "cp36-*64 cp37-*64 cp38-*64"
CIBW_MANYLINUX_X86_64_IMAGE: manylinux1
CIBW_BEFORE_BUILD: git submodule update --init --recursive && pip install .
# Install system library
CIBW_BEFORE_BUILD_LINUX: yum install -y libffi-devel || apt-get install -y libffi-devel || apk add --update --no-cache libffi-devel || true
CIBW_ENVIRONMENT: "PIP_ONLY_BINARY=numpy"
CIBW_SKIP: "*musllinux*"
- uses: actions/upload-artifact@v2
with:
name: wheels
path: ./dist/*.whl
# JetBrains PyCharm IDE
.idea/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# macOS dir files
.DS_Store
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# Checkpoints
checkpoints
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# dotenv
.env
# virtualenv
.venv
venv/
ENV/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
# Generated files
/fairseq/temporal_convolution_tbc
/fairseq/modules/*_layer/*_forward.cu
/fairseq/modules/*_layer/*_backward.cu
/fairseq/version.py
# data
data-bin/
# reranking
/examples/reranking/rerank_data
# Cython-generated C++ source files
/fairseq/data/data_utils_fast.cpp
/fairseq/data/token_block_utils_fast.cpp
# VSCODE
.vscode/ftp-sync.json
.vscode/settings.json
# Experimental Folder
experimental/*
# Weights and Biases logs
wandb/
# Hydra artifacts
nohup.out
multirun
outputs
[submodule "fairseq/model_parallel/megatron"]
path = fairseq/model_parallel/megatron
url = https://github.com/ngoyal2707/Megatron-LM
branch = fairseq
[settings]
known_third_party = _cffi_backend,agg_results,aml,bitarray,boto3,botocore,dump_hubert_feature,dynamicconv_cuda,editdistance,faiss,fasttext,feature_utils,ffmpeg,g2p_en,h5py,hydra,hypothesis,indicnlp,inflect,iopath,joblib,kaldi_io,kenlm,libfb,librosa,lightconv_cuda,matplotlib,misc,mmpt,mmpt_cli,model,nltk,npy_append_array,numpy,omegaconf,pandas,pathbuilder,preprocessing,progressbar,pythainlp,random_sequence_shuffler,regex,sacrebleu,sacremoses,scipy,sentencepiece,setuptools,six,sklearn,soundfile,sweep,sweep_wmt_en2de_transformer_big_common,tabulate,torch,torchaudio,tqdm,unidecode,utils,videoreader,wav2vec_cluster_faiss,wget,yaml
exclude: 'build|stubs'
default_language_version:
python: python3
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.1.0
hooks:
- id: trailing-whitespace
- id: check-ast
- id: check-merge-conflict
- id: no-commit-to-branch
args: ['--branch=master']
- id: check-added-large-files
args: ['--maxkb=500']
- id: end-of-file-fixer
- repo: https://github.com/ambv/black
rev: 22.3.0
hooks:
- id: black
language_version: python3.8
- repo: https://gitlab.com/pycqa/flake8
rev: 3.9.2
hooks:
- id: flake8
args: [
# only error for syntax errors and undefined names
"--select=E9,F63,F7,F82",
]
- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
- id: isort
exclude: README.md
additional_dependencies: [toml]
args: ["--profile", "black"]
# Code of Conduct
## Our Pledge
In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to make participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.
## Our Standards
Examples of behavior that contributes to creating a positive environment
include:
* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members
Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery and unwelcome sexual attention or
advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.
## Scope
This Code of Conduct applies within all project spaces, and it also applies when
an individual is representing the project or its community in public spaces.
Examples of representing a project or community include using an official
project e-mail address, posting via an official social media account, or acting
as an appointed representative at an online or offline event. Representation of
a project may be further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at <conduct@pytorch.org>. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
[homepage]: https://www.contributor-covenant.org
For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq
# Contributing to Facebook AI Research Sequence-to-Sequence Toolkit (fairseq)
We want to make contributing to this project as easy and transparent as
possible.
## Pull Requests
We actively welcome your pull requests.
1. Fork the repo and create your branch from `main`.
2. If you've added code that should be tested, add tests.
3. If you've changed APIs, update the documentation.
4. Ensure the test suite passes.
5. Make sure your code lints.
6. If you haven't already, complete the Contributor License Agreement ("CLA").
## Contributor License Agreement ("CLA")
In order to accept your pull request, we need you to submit a CLA. You only need
to do this once to work on any of Facebook's open source projects.
Complete your CLA here: <https://code.facebook.com/cla>
## Issues
We use GitHub issues to track public bugs. Please ensure your description is
clear and has sufficient instructions to be able to reproduce the issue.
## License
By contributing to Facebook AI Research Sequence-to-Sequence Toolkit (fairseq),
you agree that your contributions will be licensed under the LICENSE file in
the root directory of this source tree.
## Pre-commit hooks
In order to ensure your code lints, there are pre-commit hooks configured in the repository which you can install.
After installation, they will automatically run each time you commit.
An abbreviated guide is given below; for more information, refer to [the offical pre-commit documentation](https://pre-commit.com/).
### Installation
```
pip install pre-commit
pre-commit install
```
### Usage
Just commit your changes:
```
git commit -m "My informative commit message"
```
If there was a failure, you will get feedback
```
[INFO] Initializing environment for https://github.com/PyCQA/flake8.
[INFO] Installing environment for https://github.com/pre-commit/pre-commit-hooks.
[INFO] Once installed this environment will be reused.
[INFO] This may take a few minutes...
[INFO] Installing environment for https://github.com/PyCQA/flake8.
[INFO] Once installed this environment will be reused.
[INFO] This may take a few minutes...
Trim Trailing Whitespace.................................................Failed
- hook id: trailing-whitespace
- exit code: 1
- files were modified by this hook
Fixing examples/nllb/modeling/wmt15_benchmark/eval_langs2.sh
Fix End of Files.........................................................Failed
- hook id: end-of-file-fixer
- exit code: 1
- files were modified by this hook
Fixing examples/few_shot/scripts/schedule_jobs_few_shot.py
flake8...................................................................Passed
```
Certain hooks modify your files to comply.
To include these modifications, you will need to add them (i.e. `git add ...`) and commit again.
If all is well, you should see something like:
```
Trim Trailing Whitespace.................................................Passed
Fix End of Files.........................................................Passed
flake8...................................................................Passed
[gshard-fix-ci 8698644e1] Fix lint, add pre-commit hooks
10 files changed, 148 insertions(+), 110 deletions(-)
create mode 100644 .flake8
create mode 100644 .pre-commit-config.yaml
rename examples/nllb/modeling/wmt15_benchmark/{eval_langs2.py => eval_langs2.sh} (99%)
```
MIT License
Copyright (c) Facebook, Inc. and its affiliates.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment