Merge branch 'hepj-test' into 'main'

更新transformer代码 See merge request dcutoolkit/deeplearing/dlexamples_new!47

Merge branch 'hepj-test' into 'main'
更新transformer代码 See merge request dcutoolkit/deeplearing/dlexamples_new!47
7143f128 · sunxx1 · a30b77fe · c0f05c10 · a30b77fe · a30b77fe
Commit 7143f128 authored Jan 09, 2023 by sunxx1
20 changed files
--- a/PyTorch/NLP/Transformer/train.py
+++ b/PyTorch/NLP/Transformer/train.py
-#!/usr/bin/env python3 -u
-# Copyright (c) 2017-present, Facebook, Inc.
-# All rights reserved.
-#
-# This source code is licensed under the license found in the LICENSE file in
-# the root directory of this source tree. An additional grant of patent rights
-# can be found in the PATENTS file in the same directory.
-#
-#-------------------------------------------------------------------------
-#
-# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import collections
-import os
-import math
-import time
-import ctypes
-
-from copy import deepcopy
-
-import torch
-import sacrebleu
-import dllogger as DLLogger
-
-from fairseq import data, distributed_utils, options, utils, tokenizer
-from fairseq.ddp_trainer import DDPTrainer
-from fairseq.meters import StopwatchMeter
-from fairseq.sequence_generator import SequenceGenerator
-from fairseq.data import data_utils, load_dataset_splits
-from fairseq.models import build_model
-from fairseq.log_helper import setup_logger, reset_perf_meters
-
-def main(args):
-
-    print(args)
-    setup_logger(args)
-
-    if not torch.cuda.is_available():
-        raise NotImplementedError('Training on CPU is not supported')
-    torch.cuda.set_device(args.device_id)
-    if args.distributed_world_size > 1:
-        assert torch.distributed.is_initialized()
-        torch.distributed.broadcast(torch.tensor([1], device="cuda"), 0)
-        torch.cuda.synchronize()
-    pValue = ctypes.cast((ctypes.c_int * 1)(), ctypes.POINTER(ctypes.c_int))
-    ctypes.CDLL('libcudart.so').cudaDeviceSetLimit(ctypes.c_int(0x05), ctypes.c_int(128))
-    ctypes.CDLL('libcudart.so').cudaDeviceGetLimit(pValue, ctypes.c_int(0x05))
-    torch.manual_seed(args.seed)
-
-    src_dict, tgt_dict = data_utils.load_dictionaries(args)
-    add_extra_items_to_checkpoint({'src_dict': src_dict, 'tgt_dict': tgt_dict})
-    datasets = load_dataset_splits(args, ['train', 'valid', 'test'], src_dict, tgt_dict)
-
-    model = build_model(args)
-    print('| num. model params: {}'.format(sum(p.numel() for p in model.parameters())))
-
-    # Build trainer
-    if torch.cuda.get_device_capability(0)[0] >= 7 and not args.amp:
-        print('| NOTICE: your device may support faster training with --amp')
-    trainer = DDPTrainer(args, model)
-    print('| model {}, criterion {}'.format(args.arch, trainer.criterion.__class__.__name__))
-    print('| training on {} GPUs'.format(args.distributed_world_size))
-    print('| max tokens per GPU = {} and max sentences per GPU = {}'.format(
-        args.max_tokens,
-        args.max_sentences,
-    ))
-
-    epoch_itr = data.EpochBatchIterator(
-        dataset=datasets[args.train_subset],
-        max_tokens=args.max_tokens,
-        max_sentences=args.max_sentences_valid,
-        max_positions=args.max_positions,
-        required_batch_size_multiple=8,
-        seed=args.seed,
-        num_shards=args.distributed_world_size,
-        shard_id=args.distributed_rank,
-    )
-    # Load the latest checkpoint if one is available
-    load_checkpoint(args, trainer, epoch_itr)
-
-    # Send a dummy batch to warm the caching allocator
-    dummy_batch = data_utils.get_dummy_batch(args.max_tokens, src_dict, tgt_dict)
-    trainer.dummy_train_step(dummy_batch)
-
-    # Sanity check
-    if args.do_sanity_check:
-        print('Performing sanity check...')
-        sanity_score = score(args, trainer, datasets['test'], src_dict, tgt_dict, 'test.raw.de')
-        DLLogger.log(step='SANITY_CHECK', data={'sanity_check_score': sanity_score}, verbosity=1)
-
-    # Train until the learning rate gets too small or model reaches target score
-    max_epoch = args.max_epoch or math.inf
-    max_update = args.max_update or math.inf
-    tgt_bleu = args.target_bleu or math.inf
-    current_bleu = 0.0
-    best_bleu = -1.0
-    lr = trainer.get_lr()
-    train_meter = StopwatchMeter()
-    train_meter.start()
-    valid_losses = [None]
-    valid_subsets = args.valid_subset.split(',')
-    run_summary = {'loss': float('inf'),
-                   'val_loss': float('inf'),
-                   'speed': 0,
-                   'accuracy': 0}
-
-    while lr >= args.min_lr and epoch_itr.epoch < max_epoch and trainer.get_num_updates() < max_update and current_bleu < tgt_bleu:
-        DLLogger.log(step=trainer.get_num_updates()+1, data={'epoch': epoch_itr.epoch}, verbosity=0)
-        # train for one epoch
-        train(args, trainer, epoch_itr)
-        DLLogger.log(step=trainer.get_num_updates(), data={'walltime': train_meter.sum}, verbosity=1)
-        DLLogger.log(step=trainer.get_num_updates(),
-                     data={'avg_epoch_loss': trainer.avg_loss_meter.avg}, verbosity=1)
-
-        if epoch_itr.epoch % args.validate_interval == 0:
-            valid_losses = validate(args, trainer, datasets, valid_subsets)
-            valid_bleu = score(args, trainer, datasets[valid_subsets[0]], src_dict, tgt_dict, 'valid.raw.de')
-            DLLogger.log(step=trainer.get_num_updates(),
-                         data={'val_loss': valid_losses[0], 'val_bleu': valid_bleu}, verbosity=1)
-
-        # Eval BLEU score
-        if args.online_eval or (tgt_bleu is not math.inf):
-            current_bleu = score(args, trainer, datasets[args.gen_subset], src_dict, tgt_dict, 'test.raw.de')
-            DLLogger.log(step=trainer.get_num_updates(), data={'test_bleu': current_bleu}, verbosity=1)
-            best_bleu = max(best_bleu, current_bleu)
-
-        run_summary['val_loss'] = min(run_summary['val_loss'], valid_losses[0])
-        run_summary['accuracy'] = best_bleu if best_bleu >= 0 else valid_bleu
-        run_summary['loss'] = valid_losses[0]
-        run_summary['speed'] = trainer.throughput_meter.u_avg
-
-        # Only use first validation loss to update the learning rate
-        lr = trainer.lr_step(epoch_itr.epoch, valid_losses[0])
-
-        save_checkpoint(args, trainer, epoch_itr, valid_losses[0])
-
-    train_meter.stop()
-    run_summary['walltime'] = train_meter.sum
-    DLLogger.log(step=(), data=run_summary, verbosity=0)
-    print('| done training in {:.1f} seconds'.format(train_meter.sum))
-
-def train(args, trainer, epoch_itr):
-    """Train the model for one epoch."""
-
-    # Initialize data iterator
-    itr = epoch_itr.next_epoch_itr()
-
-    # update parameters every N batches
-    if epoch_itr.epoch <= len(args.update_freq):
-        update_freq = args.update_freq[epoch_itr.epoch - 1]
-    else:
-        update_freq = args.update_freq[-1]
-
-    max_update = args.max_update or math.inf
-    num_batches = len(epoch_itr)
-    begin = time.time()
-
-    # reset meters
-    DLLogger.flush()
-    trainer.get_throughput_meter().reset()
-
-    for i, sample in enumerate(itr):
-        if i < num_batches - 1 and (i + 1) % update_freq > 0:
-            # buffer updates according to --update-freq
-            trainer.train_step(sample, update_params=False, last_step=(i == len(itr)-1))
-            continue
-        else:
-            trainer.train_step(sample, update_params=True, last_step=(i == len(itr)-1))
-
-        # ignore the first mini-batch in words-per-second calculation
-        if i == 0:
-            trainer.get_throughput_meter().reset()
-            reset_perf_meters()
-
-        if (i+1) % args.log_interval == 0:
-            DLLogger.flush()
-
-        if trainer.get_num_updates() >= max_update:
-            break
-
-    print('Epoch time:', time.time() - begin)
-
-    # Print epoch stats and reset training meters
-    DLLogger.log(step=trainer.get_num_updates(),
-                 data={'speed': trainer.get_throughput_meter().avg}, verbosity=0)
-    DLLogger.flush()
-
-def validate(args, trainer, datasets, subsets):
-    """Evaluate the model on the validation set(s) and return the losses."""
-
-    valid_losses = []
-    for subset in subsets:
-
-        if len(subsets) > 1:
-            print('Validating on \'{}\' subset'.format(subset))
-
-        # Initialize data iterator
-        itr = data.EpochBatchIterator(
-            dataset=datasets[subset],
-            max_tokens=args.max_tokens,
-            max_sentences=args.max_sentences_valid,
-            max_positions=args.max_positions,
-            required_batch_size_multiple=8,
-            seed=args.seed,
-            num_shards=args.distributed_world_size,
-            shard_id=args.distributed_rank,
-        ).next_epoch_itr(shuffle=False)
-
-        # reset validation loss meters
-        DLLogger.flush()
-
-        subset_losses = []
-        for sample in itr:
-            loss = trainer.valid_step(sample)
-            subset_losses.append(loss)
-        subset_loss = sum(subset_losses)/len(subset_losses)
-
-        DLLogger.flush()
-
-        valid_losses.append(subset_loss)
-        print(f'Validation loss on subset {subset}: {subset_loss}')
-
-    return valid_losses
-
-def score(args, trainer, dataset, src_dict, tgt_dict, ref_file):
-
-    begin = time.time()
-
-    src_dict = deepcopy(src_dict)  # This is necessary, generation of translations
-    tgt_dict = deepcopy(tgt_dict)  # alters target dictionary messing up with the rest of training
-
-    model = trainer.get_model()
-
-    # Initialize data iterator
-    itr = data.EpochBatchIterator(
-        dataset=dataset,
-        max_tokens=None,
-        max_sentences=max(8, min(math.ceil(1024/args.distributed_world_size), 128)),
-        max_positions=args.max_positions,
-        required_batch_size_multiple=8,
-        num_shards=args.distributed_world_size,
-        shard_id=args.distributed_rank,
-        ).next_epoch_itr(shuffle=False)
-
-    # Initialize generator
-    gen_timer = StopwatchMeter()
-    translator = SequenceGenerator(
-	[model],
-        tgt_dict.get_metadata(),
-        maxlen=args.max_target_positions - 1,  # do not include EOS token
-        beam_size=args.beam,
-	stop_early=(not args.no_early_stop), normalize_scores=(not args.unnormalized),
-	len_penalty=args.lenpen, unk_penalty=args.unkpen,
-	sampling=args.sampling, sampling_topk=args.sampling_topk, minlen=args.min_len,
-        use_amp=args.amp,
-        )
-    # Generate and compute BLEU
-    predictions = []
-    translations = translator.generate_batched_itr(
-            itr, maxlen_a=args.max_len_a, maxlen_b=args.max_len_b,
-            cuda=True, timer=gen_timer, prefix_size=args.prefix_size,
-            )
-
-    for sample_id, src_tokens, _, hypos in translations:
-        # Process input and grount truth
-        src_str = src_dict.string(src_tokens, args.remove_bpe)
-
-        # Process top predictions
-        for i, hypo in enumerate(hypos[:min(len(hypos), args.nbest)]):
-            _, hypo_str, _ = utils.post_process_prediction(
-                hypo_tokens=hypo['tokens'].int().cpu(),
-                src_str=src_str,
-                alignment=hypo['alignment'].int().cpu() if hypo['alignment'] is not None else None,
-                align_dict=None,
-                tgt_dict=tgt_dict,
-                remove_bpe=args.remove_bpe
-                )
-
-            # Score only the top hypothesis
-            if i == 0:
-                hypo_str = tokenizer.Tokenizer.detokenize(hypo_str, 'de')
-                predictions.append('{}\t{}'.format(sample_id, hypo_str))
-
-    if args.distributed_world_size > 1:
-        predictions = _all_gather_predictions(predictions)
-
-    with open(os.path.join(args.data, ref_file), 'r') as reference:
-        refs = [reference.readlines()]
-    # reducing indexed predictions as strings is more memory efficient than reducing tuples
-    predictions = [tuple(item.split('\t')) for item in predictions]
-    predictions = [(int(item[0]), item[1]) for item in predictions]
-    predictions.sort(key=lambda tup: tup[0])
-    predictions = [hypo[1] + ('\n' if hypo[1][-1] != '\n' else '') for hypo in predictions]
-    sacrebleu_score = sacrebleu.corpus_bleu(predictions, refs, lowercase=not args.test_cased_bleu).score
-
-    if args.save_predictions:
-        os.makedirs(os.path.join(args.save_dir, 'predictions'), exist_ok=True)
-        fname = ref_file + '.pred.update_{}'.format(trainer.get_num_updates())
-        save_path = os.path.join(args.save_dir, 'predictions', fname)
-        with open(save_path, 'w') as f:
-            f.write(''.join(predictions))
-
-    DLLogger.log(step=trainer.get_num_updates(),
-                 data={'inference tokens/s': float(args.distributed_world_size) / gen_timer.avg},
-                 verbosity=0)
-    DLLogger.flush()
-    if gen_timer.sum != 0:
-        print('| Translated {} sentences ({} tokens) in {:.1f}s ({:.2f} sentences/s, {:.2f} tokens/s)'.format(
-            len(predictions),
-            gen_timer.n,
-            gen_timer.sum,
-            len(predictions) / gen_timer.sum,
-            float(args.distributed_world_size)/gen_timer.avg
-            ))
-
-    print('| Eval completed in: {:.2f}s | {}CASED BLEU {:.2f}'.format(
-        time.time()-begin,
-        '' if args.test_cased_bleu else 'UN',
-        sacrebleu_score
-        ))
-
-    return sacrebleu_score
-
-def _all_gather_predictions(predictions):
-    ready = False
-    all_ready = False
-    reduced_predictions = []
-    max_size = 65000
-    while not all_ready:
-        lst_len = len(predictions)
-        size = 2000     # some extra space for python stuff
-        n = 0
-        while n < lst_len:
-            str_len = len(predictions[n].encode('utf8')) + 8  # per string pickle overhead
-            if size + str_len >= max_size:
-                break
-            size += str_len
-            n += 1
-        chunk = predictions[:n]
-        predictions = predictions[n:]
-        if not predictions:
-            ready = True
-        chunk = (ready, chunk)
-        torch.cuda.synchronize()
-        gathered = distributed_utils.all_gather_list(chunk, max_size=65000)
-        torch.cuda.synchronize()
-        reduced_predictions += [t[1] for t in gathered]
-        all_ready = all([t[0] for t in gathered])
-
-    reduced_predictions = [item for sublist in reduced_predictions for item in sublist]
-
-    return reduced_predictions
-
-
-def save_checkpoint(args, trainer, epoch_itr, val_loss):
-    if epoch_itr.epoch % args.save_interval != 0:
-        return
-    if args.no_save or not distributed_utils.is_master(args):
-        return
-    epoch = epoch_itr.epoch
-    end_of_epoch = epoch_itr.end_of_epoch()
-
-    checkpoint_conds = collections.OrderedDict()
-    checkpoint_conds['checkpoint{}.pt'.format(epoch)] = end_of_epoch and not args.no_epoch_checkpoints
-    checkpoint_conds['checkpoint_best.pt'] = (
-            val_loss is not None and
-            (not hasattr(save_checkpoint, 'best') or val_loss < save_checkpoint.best)
-    )
-    checkpoint_conds['checkpoint_last.pt'] = True  # keep this last so that it's a symlink
-
-    prev_best = getattr(save_checkpoint, 'best', val_loss)
-    if val_loss is not None:
-        save_checkpoint.best = min(val_loss, prev_best)
-    extra_state = {
-        'best': save_checkpoint.best,
-        'train_iterator': epoch_itr.state_dict(),
-        'val_loss': val_loss,
-    }
-    extra_state.update(save_checkpoint.extra_items)
-
-    checkpoints = [os.path.join(args.save_dir, 'checkpoints', fn)
-                   for fn, cond in checkpoint_conds.items() if cond]
-    if checkpoints:
-        for cp in checkpoints:
-            trainer.save_checkpoint(cp, extra_state)
-
-
-def add_extra_items_to_checkpoint(items):
-    if not hasattr(save_checkpoint, 'extra_items'):
-        save_checkpoint.extra_items = {}
-    save_checkpoint.extra_items.update(items)
-
-def load_checkpoint(args, trainer, epoch_itr):
-    """Load a checkpoint and replay dataloader to match."""
-    os.makedirs(os.path.join(args.save_dir, 'checkpoints'), exist_ok=True)
-    checkpoint_path = os.path.join(args.save_dir, 'checkpoints', args.restore_file)
-    if os.path.isfile(checkpoint_path):
-        extra_state = trainer.load_checkpoint(checkpoint_path)
-        if extra_state is not None:
-            # replay train iterator to match checkpoint
-            epoch_itr.load_state_dict(extra_state['train_iterator'])
-
-            print('| loaded checkpoint {} (epoch {} @ {} updates)'.format(
-                checkpoint_path, epoch_itr.epoch, trainer.get_num_updates()))
-
-            trainer.lr_step(epoch_itr.epoch)
-            trainer.lr_step_update(trainer.get_num_updates())
-            if 'best' in extra_state:
-                save_checkpoint.best = extra_state['best']
-
-
-if __name__ == '__main__':
-    parser = options.get_training_parser()
-    ARGS = options.parse_args_and_arch(parser)
-
-    if ARGS.distributed_world_size > 1:
-        distributed_utils.distributed_init(ARGS)
-
-    main(ARGS)
--- a/PyTorch/NLP/Transformer/transformer.png
+++ b/PyTorch/NLP/Transformer/transformer.png
--- a/PyTorch/NLP/new-Transformer/.circleci/config.yml
+++ b/PyTorch/NLP/new-Transformer/.circleci/config.yml
+# Use 2.1 for orbs
+version: 2.1
+
+# -------------------------------------------------------------------------------------
+# Environments to run the jobs in
+# -------------------------------------------------------------------------------------
+gpu: &gpu
+  environment:
+    CUDA_VERSION: "11.2"
+  machine:
+    image: ubuntu-2004-cuda-11.2:202103-01
+  resource_class: gpu.nvidia.medium.multi
+
+
+# -------------------------------------------------------------------------------------
+# Re-usable commands
+# -------------------------------------------------------------------------------------
+cache_key: &cache_key cache-key-{{ .Environment.CIRCLE_JOB }}-{{ checksum ".circleci/config.yml" }}-{{ checksum "setup.py"}}
+
+install_dep_common: &install_dep_common
+  - run:
+      name: Install Common Dependencies
+      command: |
+        source activate fairseq
+        pip install --upgrade setuptools
+        pip install bitarray boto3 deepspeed editdistance fastBPE iopath ipdb ipython pyarrow pytest sacremoses sentencepiece subword-nmt hydra-core==1.0.7 omegaconf==2.0.6
+        pip install --progress-bar off pytest
+        pip install --progress-bar off fairscale
+        pip install -i https://test.pypi.org/simple/ bitsandbytes-cuda111 -U
+        python -c 'import torch; print("Torch version:", torch.__version__)'
+        python -m torch.utils.collect_env
+
+install_dep_fused_ops: &install_dep_fused_ops
+  - run:
+      name: Install Megatron/Apex Dependencies
+      working_directory: ~/
+      command: |
+        source activate fairseq
+        git clone https://github.com/NVIDIA/apex
+        cd apex
+        git checkout e2083df5eb96643c61613b9df48dd4eea6b07690
+        sed -i '101,107 s/^/#/' setup.py
+        pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--deprecated_fused_adam" --global-option="--xentropy" --global-option="--fast_multihead_attn" ./
+        cd ~/
+        git clone --depth=1 --branch v2.4 https://github.com/NVIDIA/Megatron-LM.git
+        cd Megatron-LM
+        pip install -e .
+
+install_dep_xformers: &install_dep_xformers
+  - run:
+      name: Install xFormers Dependencies
+      working_directory: ~/
+      command: |
+        source activate fairseq
+        git clone https://github.com/facebookresearch/xformers.git
+        cd xformers
+        pip install -r requirements.txt
+        pip install -e .
+
+install_dep_pt19: &install_dep_pt19
+  - run:
+      name: Install Pytorch Dependencies
+      command: |
+        source activate fairseq
+        pip install --upgrade setuptools
+        pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html
+        python -c 'import torch; print("Torch version:", torch.__version__)'
+
+install_dep_pt18: &install_dep_pt18
+  - run:
+      name: Install Pytorch Dependencies
+      command: |
+        source activate fairseq
+        pip install --upgrade setuptools
+        pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
+        python -c 'import torch; print("Torch version:", torch.__version__)'
+
+install_repo: &install_repo
+  - run:
+      name: Install Repository
+      command: |
+        source activate fairseq
+        pip install .
+        python setup.py build_ext --inplace
+
+run_unittests: &run_unittests
+  - run:
+      name: Run Unit Tests
+      command: |
+        source activate fairseq
+        pytest tests/gpu/test_binaries_gpu.py
+
+check_nvidia_driver: &check_nvidia_driver
+  - run:
+      name: Check NVIDIA Driver
+      working_directory: ~/
+      command: |
+        pyenv versions
+        nvidia-smi
+
+create_conda_env: &create_conda_env
+  - run:
+      name: Install and Create Conda Environment
+      command: |
+        curl -o ~/miniconda.sh -O  https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
+        chmod +x ~/miniconda.sh
+        ~/miniconda.sh -b -p $HOME/miniconda
+        rm ~/miniconda.sh
+        echo 'export PATH=$HOME/miniconda/bin:$PATH' >> $BASH_ENV
+        source $BASH_ENV
+        if [ ! -d ~/miniconda/envs/fairseq ]
+        then
+          conda create -y -n fairseq python=3.8
+        fi
+        source activate fairseq
+        python --version
+        pip install --upgrade pip
+# -------------------------------------------------------------------------------------
+# Jobs to run
+# -------------------------------------------------------------------------------------
+
+jobs:
+
+  gpu_tests_pt19:
+    <<: *gpu
+
+    working_directory: ~/fairseq-py
+
+    steps:
+      - checkout
+      - <<: *check_nvidia_driver
+      - <<: *create_conda_env
+      - restore_cache:
+          key: *cache_key
+      - <<: *install_dep_pt19
+      - <<: *install_dep_common
+      - <<: *install_dep_fused_ops
+      - save_cache:
+          paths:
+            - ~/miniconda/
+          key: *cache_key
+      - <<: *install_repo
+      - <<: *run_unittests
+
+  gpu_tests_pt18:
+    <<: *gpu
+
+    working_directory: ~/fairseq-py
+
+    steps:
+      - checkout
+      - <<: *check_nvidia_driver
+      - <<: *create_conda_env
+      - restore_cache:
+          key: *cache_key
+      - <<: *install_dep_pt18
+      - <<: *install_dep_common
+      - <<: *install_dep_fused_ops
+      - save_cache:
+          paths:
+            - ~/miniconda/
+          key: *cache_key
+      - <<: *install_repo
+      - <<: *run_unittests
+
+workflows:
+  version: 2
+  build:
+    jobs:
+      - gpu_tests_pt18
+      - gpu_tests_pt19
--- a/PyTorch/NLP/new-Transformer/.github/CODEOWNERS
+++ b/PyTorch/NLP/new-Transformer/.github/CODEOWNERS
+# Setting up CODEOWNERS for UST related codebase
+# Documentation for open sourced models relevant to UST
+examples/speech_to_text     @kahne @sravyapopuri388 @jmp84
+examples/speech_to_speech   @an918tw @sravyapopuri388 @jmp84
+examples/speech_synthesis   @kahne @jmp84
+examples/simultaneous_translation   @kahne @jmp84
+examples/speech_text_joint_to_text  @yuntang @jmp84
+
+# Speech related models relevant to UST
+fairseq/models/speech_to_speech @sravyapopuri388 @jmp84
+fairseq/models/speech_to_text   @kahne @sravyapopuri388 @jmp84
+fairseq/models/text_to_speech   @kahne @jmp84
+
+# CONFORMER IMPLEMENTATION
+fairseq/modules/conformer_layer.py @sravyapopuri388 @jmp84
+fairseq/modules/espnet_multihead_attention.py @sravyapopuri388 @jmp84
+fairseq/modules/rotary_positional_embedding.py @sravyapopuri388 @jmp84
+fairseq/modules/positional_encoding.py @sravyapopuri388 @jmp84
--- a/PyTorch/NLP/new-Transformer/.github/ISSUE_TEMPLATE.md
+++ b/PyTorch/NLP/new-Transformer/.github/ISSUE_TEMPLATE.md
+## 👉 [Please follow one of these issue templates](https://github.com/pytorch/fairseq/issues/new/choose) 👈
+
+Note: to keep the backlog clean and actionable, issues may be immediately closed if they do not follow one of the above issue templates.
--- a/PyTorch/NLP/new-Transformer/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/PyTorch/NLP/new-Transformer/.github/ISSUE_TEMPLATE/bug_report.md
+---
+name: 🐛 Bug Report
+about: Submit a bug report to help us improve
+labels: 'bug, needs triage'
+---
+
+## 🐛 Bug
+
+<!-- A clear and concise description of what the bug is. -->
+
+### To Reproduce
+
+Steps to reproduce the behavior (**always include the command you ran**):
+
+1. Run cmd '....'
+2. See error
+
+<!-- If you have a code sample, error messages, stack traces, please provide it here as well -->
+
+
+#### Code sample
+<!-- Ideally attach a minimal code sample to reproduce the decried issue.
+Minimal means having the shortest code but still preserving the bug. -->
+
+### Expected behavior
+
+<!-- A clear and concise description of what you expected to happen. -->
+
+### Environment
+
+ - fairseq Version (e.g., 1.0 or main):
+ - PyTorch Version (e.g., 1.0)
+ - OS (e.g., Linux):
+ - How you installed fairseq (`pip`, source):
+ - Build command you used (if compiling from source):
+ - Python version:
+ - CUDA/cuDNN version:
+ - GPU models and configuration:
+ - Any other relevant information:
+
+### Additional context
+
+<!-- Add any other context about the problem here. -->
--- a/PyTorch/NLP/new-Transformer/.github/ISSUE_TEMPLATE/documentation.md
+++ b/PyTorch/NLP/new-Transformer/.github/ISSUE_TEMPLATE/documentation.md
+---
+name: 📚 Documentation/Typos
+about: Report an issue related to documentation or a typo
+labels: 'documentation, needs triage'
+---
+
+## 📚 Documentation
+
+For typos and doc fixes, please go ahead and:
+
+1. Create an issue.
+2. Fix the typo.
+3. Submit a PR.
+
+Thanks!
--- a/PyTorch/NLP/new-Transformer/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/PyTorch/NLP/new-Transformer/.github/ISSUE_TEMPLATE/feature_request.md
+---
+name: 🚀 Feature Request
+about: Submit a proposal/request for a new feature
+labels: 'enhancement, help wanted, needs triage'
+---
+
+## 🚀 Feature Request
+<!-- A clear and concise description of the feature proposal -->
+
+### Motivation
+
+<!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->
+
+### Pitch
+
+<!-- A clear and concise description of what you want to happen. -->
+
+### Alternatives
+
+<!-- A clear and concise description of any alternative solutions or features you've considered, if any. -->
+
+### Additional context
+
+<!-- Add any other context or screenshots about the feature request here. -->
--- a/PyTorch/NLP/new-Transformer/.github/ISSUE_TEMPLATE/how-to-question.md
+++ b/PyTorch/NLP/new-Transformer/.github/ISSUE_TEMPLATE/how-to-question.md
+---
+name: ❓ Questions/Help
+about: If you have questions, please first search existing issues and docs
+labels: 'question, needs triage'
+---
+
+## ❓ Questions and Help
+
+### Before asking:
+1. search the issues.
+2. search the docs.
+
+<!-- If you still can't find what you need: -->
+
+#### What is your question?
+
+#### Code
+
+<!-- Please paste a code snippet if your question requires it! -->
+
+#### What have you tried?
+
+#### What's your environment?
+
+ - fairseq Version (e.g., 1.0 or main):
+ - PyTorch Version (e.g., 1.0)
+ - OS (e.g., Linux):
+ - How you installed fairseq (`pip`, source):
+ - Build command you used (if compiling from source):
+ - Python version:
+ - CUDA/cuDNN version:
+ - GPU models and configuration:
+ - Any other relevant information:
--- a/PyTorch/NLP/new-Transformer/.github/PULL_REQUEST_TEMPLATE.md
+++ b/PyTorch/NLP/new-Transformer/.github/PULL_REQUEST_TEMPLATE.md
+# Before submitting
+
+- [ ] Was this discussed/approved via a Github issue? (no need for typos, doc improvements)
+- [ ] Did you read the [contributor guideline](https://github.com/pytorch/fairseq/blob/main/CONTRIBUTING.md)?
+- [ ] Did you make sure to update the docs?
+- [ ] Did you write any new necessary tests?
+
+## What does this PR do?
+Fixes # (issue).
+
+## PR review
+Anyone in the community is free to review the PR once the tests have passed.
+If we didn't discuss your PR in Github issues there's a high chance it will not be merged.
+
+## Did you have fun?
+Make sure you had fun coding 🙃
--- a/PyTorch/NLP/new-Transformer/.github/stale.yml
+++ b/PyTorch/NLP/new-Transformer/.github/stale.yml
+# Configuration for probot-stale - https://github.com/probot/stale
+# Mostly copied from github.com/facebook/react/blob/master/.github/stale.yml
+# Number of days of inactivity before an issue becomes stale
+daysUntilStale: 90
+# Number of days of inactivity before a stale issue is closed
+daysUntilClose: 7
+# Issues with these labels will never be considered stale
+exemptLabels:
+  - bug
+# Label to use when marking an issue as stale
+staleLabel: stale
+issues:
+  # Comment to post when marking an issue as stale.
+  markComment: >
+    This issue has been automatically marked as stale.
+    **If this issue is still affecting you, please leave any comment** (for example, "bump"), and we'll keep it open.
+    We are sorry that we haven't been able to prioritize it yet. If you have any new additional information, please include it with your comment!
+  # Comment to post when closing a stale issue.
+  closeComment: >
+    Closing this issue after a prolonged period of inactivity. If this issue is still present in the latest release, please create a new issue with up-to-date information. Thank you!
+pulls:
+  # Comment to post when marking a pull request as stale.
+  markComment: >
+    This pull request has been automatically marked as stale.
+    **If this pull request is still relevant, please leave any comment** (for example, "bump"), and we'll keep it open.
+    We are sorry that we haven't been able to prioritize reviewing it yet. Your contribution is very much appreciated.
+  # Comment to post when closing a stale pull request.
+  closeComment: >
+    Closing this pull request after a prolonged period of inactivity. If this issue is still present in the latest release, please ask for this pull request to be reopened. Thank you!
+
--- a/PyTorch/NLP/new-Transformer/.github/workflows/build.yml
+++ b/PyTorch/NLP/new-Transformer/.github/workflows/build.yml
+name: build
+
+on:
+  # Trigger the workflow on push to main or any pull request
+  push:
+    branches:
+      - main
+  pull_request:
+
+jobs:
+  build:
+
+    strategy:
+      max-parallel: 4
+      matrix:
+        platform: [ubuntu-latest, macos-latest]
+        python-version: [3.8, 3.9]
+
+    runs-on: ${{ matrix.platform }}
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Conditionally install pytorch
+      if: matrix.platform == 'windows-latest'
+      run: pip3 install torch -f https://download.pytorch.org/whl/torch_stable.html
+
+    - name: Install locally
+      run: |
+        python -m pip install --upgrade pip
+        git submodule update --init --recursive
+        python setup.py build_ext --inplace
+        python -m pip install --editable .
+
+    - name: Install optional test requirements
+      run: |
+        python -m pip install iopath transformers pyarrow
+        python -m pip install git+https://github.com/facebookresearch/fairscale.git@main
+        python -m pip install --progress-bar off git+https://github.com/facebookresearch/xformers.git@main
+        python -m pip install pytest
+
+    - name: Lint with flake8
+      run: |
+        pip install flake8
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --extend-exclude fairseq/model_parallel/megatron
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --extend-exclude fairseq/model_parallel/megatron
+
+    - name: Run tests
+      run: |
+         python setup.py test
+
+    - name: Lint with black
+      run: |
+        pip install black==22.3.0
+        black --check . --extend-exclude 'examples|fairseq\/model_parallel\/megatron'
--- a/PyTorch/NLP/new-Transformer/.github/workflows/build_wheels.yml
+++ b/PyTorch/NLP/new-Transformer/.github/workflows/build_wheels.yml
+name: build_wheels
+
+on:
+  push:
+    branches:
+      - v[0-9]+.[0-9]+.[x0-9]+
+    tags:
+      - v*
+
+jobs:
+  build_wheels:
+    name: Build wheels on ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Install Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.8'
+
+      - name: Upgrade pip
+        run: |
+          python3 -m pip install --upgrade pip
+
+      - name: Install cibuildwheel
+        run: |
+          python3 -m pip install cibuildwheel
+
+      - name: Build wheels for CPython
+        run: |
+          python3 -m cibuildwheel --output-dir dist
+        env:
+          CIBW_BUILD: "cp36-*64 cp37-*64 cp38-*64"
+          CIBW_MANYLINUX_X86_64_IMAGE: manylinux1
+          CIBW_BEFORE_BUILD: git submodule update --init --recursive && pip install .
+          # Install system library
+          CIBW_BEFORE_BUILD_LINUX: yum install -y libffi-devel || apt-get install -y libffi-devel || apk add --update --no-cache libffi-devel || true
+          CIBW_ENVIRONMENT: "PIP_ONLY_BINARY=numpy"
+          CIBW_SKIP: "*musllinux*"
+
+      - uses: actions/upload-artifact@v2
+        with:
+          name: wheels
+          path: ./dist/*.whl
--- a/PyTorch/NLP/new-Transformer/.gitignore
+++ b/PyTorch/NLP/new-Transformer/.gitignore
+# JetBrains PyCharm IDE
+.idea/
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# macOS dir files
+.DS_Store
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Checkpoints
+checkpoints
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# dotenv
+.env
+
+# virtualenv
+.venv
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+# Generated files
+/fairseq/temporal_convolution_tbc
+/fairseq/modules/*_layer/*_forward.cu
+/fairseq/modules/*_layer/*_backward.cu
+/fairseq/version.py
+
+# data
+data-bin/
+
+# reranking
+/examples/reranking/rerank_data
+
+# Cython-generated C++ source files
+/fairseq/data/data_utils_fast.cpp
+/fairseq/data/token_block_utils_fast.cpp
+
+# VSCODE
+.vscode/ftp-sync.json
+.vscode/settings.json
+
+# Experimental Folder
+experimental/*
+
+# Weights and Biases logs
+wandb/
+
+# Hydra artifacts
+nohup.out
+multirun
+outputs
--- a/PyTorch/NLP/new-Transformer/.gitmodules
+++ b/PyTorch/NLP/new-Transformer/.gitmodules
+[submodule "fairseq/model_parallel/megatron"]
+    path = fairseq/model_parallel/megatron
+    url = https://github.com/ngoyal2707/Megatron-LM
+    branch = fairseq
--- a/PyTorch/NLP/new-Transformer/.isort.cfg
+++ b/PyTorch/NLP/new-Transformer/.isort.cfg
+[settings]
+known_third_party = _cffi_backend,agg_results,aml,bitarray,boto3,botocore,dump_hubert_feature,dynamicconv_cuda,editdistance,faiss,fasttext,feature_utils,ffmpeg,g2p_en,h5py,hydra,hypothesis,indicnlp,inflect,iopath,joblib,kaldi_io,kenlm,libfb,librosa,lightconv_cuda,matplotlib,misc,mmpt,mmpt_cli,model,nltk,npy_append_array,numpy,omegaconf,pandas,pathbuilder,preprocessing,progressbar,pythainlp,random_sequence_shuffler,regex,sacrebleu,sacremoses,scipy,sentencepiece,setuptools,six,sklearn,soundfile,sweep,sweep_wmt_en2de_transformer_big_common,tabulate,torch,torchaudio,tqdm,unidecode,utils,videoreader,wav2vec_cluster_faiss,wget,yaml
--- a/PyTorch/NLP/new-Transformer/.pre-commit-config.yaml
+++ b/PyTorch/NLP/new-Transformer/.pre-commit-config.yaml
+exclude: 'build|stubs'
+
+default_language_version:
+    python: python3
+
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.1.0
+    hooks:
+    -   id: trailing-whitespace
+    -   id: check-ast
+    -   id: check-merge-conflict
+    -   id: no-commit-to-branch
+        args: ['--branch=master']
+    -   id: check-added-large-files
+        args: ['--maxkb=500']
+    -   id: end-of-file-fixer
+
+-   repo: https://github.com/ambv/black
+    rev: 22.3.0
+    hooks:
+    - id: black
+      language_version: python3.8
+
+-   repo: https://gitlab.com/pycqa/flake8
+    rev: 3.9.2
+    hooks:
+    -   id: flake8
+        args: [
+            # only error for syntax errors and undefined names
+            "--select=E9,F63,F7,F82",
+        ]
+
+-   repo: https://github.com/pycqa/isort
+    rev: 5.10.1
+    hooks:
+    -   id: isort
+        exclude: README.md
+        additional_dependencies: [toml]
+        args: ["--profile", "black"]
--- a/PyTorch/NLP/new-Transformer/CODE_OF_CONDUCT.md
+++ b/PyTorch/NLP/new-Transformer/CODE_OF_CONDUCT.md
+# Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to make participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, sex characteristics, gender identity and expression,
+level of experience, education, socio-economic status, nationality, personal
+appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+  advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+  address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies within all project spaces, and it also applies when
+an individual is representing the project or its community in public spaces.
+Examples of representing a project or community include using an official
+project e-mail address, posting via an official social media account, or acting
+as an appointed representative at an online or offline event. Representation of
+a project may be further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at <conduct@pytorch.org>. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+
+[homepage]: https://www.contributor-covenant.org
+
+For answers to common questions about this code of conduct, see
+https://www.contributor-covenant.org/faq
+
--- a/PyTorch/NLP/new-Transformer/CONTRIBUTING.md
+++ b/PyTorch/NLP/new-Transformer/CONTRIBUTING.md
+# Contributing to Facebook AI Research Sequence-to-Sequence Toolkit (fairseq)
+We want to make contributing to this project as easy and transparent as
+possible.
+
+## Pull Requests
+We actively welcome your pull requests.
+
+1. Fork the repo and create your branch from `main`.
+2. If you've added code that should be tested, add tests.
+3. If you've changed APIs, update the documentation.
+4. Ensure the test suite passes.
+5. Make sure your code lints.
+6. If you haven't already, complete the Contributor License Agreement ("CLA").
+
+## Contributor License Agreement ("CLA")
+In order to accept your pull request, we need you to submit a CLA. You only need
+to do this once to work on any of Facebook's open source projects.
+
+Complete your CLA here: <https://code.facebook.com/cla>
+
+## Issues
+We use GitHub issues to track public bugs. Please ensure your description is
+clear and has sufficient instructions to be able to reproduce the issue.
+
+## License
+By contributing to Facebook AI Research Sequence-to-Sequence Toolkit (fairseq),
+you agree that your contributions will be licensed under the LICENSE file in
+the root directory of this source tree.
+
+## Pre-commit hooks
+In order to ensure your code lints, there are pre-commit hooks configured in the repository which you can install.
+After installation, they will automatically run each time you commit.
+An abbreviated guide is given below; for more information, refer to [the offical pre-commit documentation](https://pre-commit.com/).
+
+### Installation
+```
+pip install pre-commit
+pre-commit install
+```
+
+### Usage
+Just commit your changes:
+```
+git commit -m "My informative commit message"
+```
+
+If there was a failure, you will get feedback
+```
+[INFO] Initializing environment for https://github.com/PyCQA/flake8.
+[INFO] Installing environment for https://github.com/pre-commit/pre-commit-hooks.
+[INFO] Once installed this environment will be reused.
+[INFO] This may take a few minutes...
+[INFO] Installing environment for https://github.com/PyCQA/flake8.
+[INFO] Once installed this environment will be reused.
+[INFO] This may take a few minutes...
+Trim Trailing Whitespace.................................................Failed
+- hook id: trailing-whitespace
+- exit code: 1
+- files were modified by this hook
+Fixing examples/nllb/modeling/wmt15_benchmark/eval_langs2.sh
+Fix End of Files.........................................................Failed
+- hook id: end-of-file-fixer
+- exit code: 1
+- files were modified by this hook
+Fixing examples/few_shot/scripts/schedule_jobs_few_shot.py
+flake8...................................................................Passed
+```
+
+Certain hooks modify your files to comply.
+To include these modifications, you will need to add them (i.e. `git add ...`) and commit again.
+
+If all is well, you should see something like:
+```
+Trim Trailing Whitespace.................................................Passed
+Fix End of Files.........................................................Passed
+flake8...................................................................Passed
+[gshard-fix-ci 8698644e1] Fix lint, add pre-commit hooks
+ 10 files changed, 148 insertions(+), 110 deletions(-)
+ create mode 100644 .flake8
+ create mode 100644 .pre-commit-config.yaml
+ rename examples/nllb/modeling/wmt15_benchmark/{eval_langs2.py => eval_langs2.sh} (99%)
+ ```
--- a/PyTorch/NLP/new-Transformer/LICENSE
+++ b/PyTorch/NLP/new-Transformer/LICENSE
+MIT License
+
+Copyright (c) Facebook, Inc. and its affiliates.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.