Commit 18d27e00 authored by wangwei990215's avatar wangwei990215
Browse files

initial commit

parent 541f4c7a
# @package _group_
activation_fn: "relu"
dropout: 0.1
attention_dropout: 0.0
activation_dropout: 0.0
relu_dropout: 0.0
decoder_embed_dim: 1024
decoder_output_dim: 1024
decoder_input_dim: 1024
decoder_ffn_embed_dim: 4096
decoder_layers: 12
decoder_attention_heads: 16
decoder_normalize_before: true
no_decoder_final_norm: false
adaptive_softmax_cutoff: null
adaptive_softmax_dropout: 0
adaptive_softmax_factor: 4
no_token_positional_embeddings: false
share_decoder_input_output_embed: false
character_embeddings: false
character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
character_embedding_dim: 4
char_embedder_highway_layers: 2
adaptive_input: false
adaptive_input_factor: 4
adaptive_input_cutoff: null
tie_adaptive_weights: false
tie_adaptive_proj: false
decoder_learned_pos: false
decoder_layerdrop: 0
decoder_layers_to_keep: null
layernorm_embedding: false
no_scale_embedding: false
quant_noise_pq: 0
quant_noise_pq_block_size: 8
quant_noise_scalar: 0
# @package _group_
activation_fn: "relu"
dropout: 0.1
attention_dropout: 0.1
activation_dropout: 0.0
relu_dropout: 0.0
decoder_embed_dim: 512
decoder_output_dim: 512
decoder_input_dim: 512
decoder_ffn_embed_dim: 4096
decoder_layers: 12
decoder_attention_heads: 16
decoder_normalize_before: true
no_decoder_final_norm: true
adaptive_softmax_cutoff: null
adaptive_softmax_dropout: 0
adaptive_softmax_factor: 4
no_token_positional_embeddings: false
share_decoder_input_output_embed: false
character_embeddings: false
character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
character_embedding_dim: 4
char_embedder_highway_layers: 2
adaptive_input: false
adaptive_input_factor: 4
adaptive_input_cutoff: null
tie_adaptive_weights: false
tie_adaptive_proj: false
decoder_learned_pos: false
decoder_layerdrop: 0
decoder_layers_to_keep: null
layernorm_embedding: false
no_scale_embedding: false
quant_noise_pq: 0
quant_noise_pq_block_size: 8
quant_noise_scalar: 0
# @package _group_
activation_fn: "gelu"
dropout: 0.1
attention_dropout: 0.1
activation_dropout: 0.0
relu_dropout: 0.0
decoder_embed_dim: 768
decoder_output_dim: 768
decoder_input_dim: 768
decoder_ffn_embed_dim: 3072
decoder_layers: 12
decoder_attention_heads: 12
decoder_normalize_before: true
no_decoder_final_norm: false
adaptive_softmax_cutoff: null
adaptive_softmax_dropout: 0
adaptive_softmax_factor: 4
no_token_positional_embeddings: false
share_decoder_input_output_embed: false
character_embeddings: false
character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
character_embedding_dim: 4
char_embedder_highway_layers: 2
adaptive_input: false
adaptive_input_factor: 4
adaptive_input_cutoff: null
tie_adaptive_weights: false
tie_adaptive_proj: false
decoder_learned_pos: false
decoder_layerdrop: 0
decoder_layers_to_keep: null
layernorm_embedding: false
no_scale_embedding: false
quant_noise_pq: 0
quant_noise_pq_block_size: 8
quant_noise_scalar: 0
# @package _group_
activation_fn: "gelu"
dropout: 0.1
attention_dropout: 0.1
activation_dropout: 0.0
relu_dropout: 0.0
decoder_embed_dim: 1600
decoder_output_dim: 1600
decoder_input_dim: 1600
decoder_ffn_embed_dim: 6400
decoder_layers: 48
decoder_attention_heads: 25
decoder_normalize_before: true
no_decoder_final_norm: false
adaptive_softmax_cutoff: null
adaptive_softmax_dropout: 0
adaptive_softmax_factor: 4
no_token_positional_embeddings: false
share_decoder_input_output_embed: false
character_embeddings: false
character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
character_embedding_dim: 4
char_embedder_highway_layers: 2
adaptive_input: false
adaptive_input_factor: 4
adaptive_input_cutoff: null
tie_adaptive_weights: false
tie_adaptive_proj: false
decoder_learned_pos: false
decoder_layerdrop: 0
decoder_layers_to_keep: null
layernorm_embedding: false
no_scale_embedding: false
quant_noise_pq: 0
quant_noise_pq_block_size: 8
quant_noise_scalar: 0
# @package _group_
activation_fn: "gelu"
dropout: 0.1
attention_dropout: 0.1
activation_dropout: 0.0
relu_dropout: 0.0
decoder_embed_dim: 1280
decoder_output_dim: 1280
decoder_input_dim: 1280
decoder_ffn_embed_dim: 5120
decoder_layers: 36
decoder_attention_heads: 20
decoder_normalize_before: true
no_decoder_final_norm: false
adaptive_softmax_cutoff: null
adaptive_softmax_dropout: 0
adaptive_softmax_factor: 4
no_token_positional_embeddings: false
share_decoder_input_output_embed: false
character_embeddings: false
character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
character_embedding_dim: 4
char_embedder_highway_layers: 2
adaptive_input: false
adaptive_input_factor: 4
adaptive_input_cutoff: null
tie_adaptive_weights: false
tie_adaptive_proj: false
decoder_learned_pos: false
decoder_layerdrop: 0
decoder_layers_to_keep: null
layernorm_embedding: false
no_scale_embedding: false
quant_noise_pq: 0
quant_noise_pq_block_size: 8
quant_noise_scalar: 0
# @package _group_
activation_fn: "gelu"
dropout: 0.1
attention_dropout: 0.1
activation_dropout: 0.0
relu_dropout: 0.0
decoder_embed_dim: 1024
decoder_output_dim: 1024
decoder_input_dim: 1024
decoder_ffn_embed_dim: 4096
decoder_layers: 24
decoder_attention_heads: 16
decoder_normalize_before: true
no_decoder_final_norm: false
adaptive_softmax_cutoff: null
adaptive_softmax_dropout: 0
adaptive_softmax_factor: 4
no_token_positional_embeddings: false
share_decoder_input_output_embed: false
character_embeddings: false
character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
character_embedding_dim: 4
char_embedder_highway_layers: 2
adaptive_input: false
adaptive_input_factor: 4
adaptive_input_cutoff: null
tie_adaptive_weights: false
tie_adaptive_proj: false
decoder_learned_pos: false
decoder_layerdrop: 0
decoder_layers_to_keep: null
layernorm_embedding: false
no_scale_embedding: false
quant_noise_pq: 0
quant_noise_pq_block_size: 8
quant_noise_scalar: 0
# @package _group_
activation_fn: "relu"
dropout: 0.3
attention_dropout: 0.1
activation_dropout: 0.1
relu_dropout: 0.1
decoder_embed_dim: 1024
decoder_output_dim: 1024
decoder_input_dim: 1024
decoder_ffn_embed_dim: 4096
decoder_layers: 16
decoder_attention_heads: 8
decoder_normalize_before: true
no_decoder_final_norm: true
adaptive_softmax_cutoff: "20000,60000"
adaptive_softmax_dropout: 0.2
adaptive_softmax_factor: 4
no_token_positional_embeddings: false
share_decoder_input_output_embed: false
character_embeddings: false
character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
character_embedding_dim: 4
char_embedder_highway_layers: 2
adaptive_input: true
adaptive_input_factor: 4
adaptive_input_cutoff: "20000,60000"
tie_adaptive_weights: true
tie_adaptive_proj: true
decoder_learned_pos: false
decoder_layerdrop: 0
decoder_layers_to_keep: null
layernorm_embedding: false
no_scale_embedding: false
quant_noise_pq: 0
quant_noise_pq_block_size: 8
quant_noise_scalar: 0
# @package _group_
adam_betas: "(0.9, 0.999)"
adam_eps: 1.0e-8
weight_decay: 0
use_old_adam: false
# @package _group_
momentum: 0.99
weight_decay: 0.0
# @package _group_
common:
no_progress_bar: false
log_interval: 100
log_format: null
tensorboard_logdir: null
seed: 1
cpu: false
fp16: false
memory_efficient_fp16: false
fp16_no_flatten_grads: false
fp16_init_scale: 128
fp16_scale_window: null
fp16_scale_tolerance: 0.0
min_loss_scale: 1.0e-4
threshold_loss_scale: null
user_dir: null
empty_cache_freq: 0
all_gather_list_size: 16384
model_parallel_size: 1
checkpoint_suffix: ""
quantization_config_path: null
distributed_training:
distributed_rank: 0
distributed_backend: "nccl"
distributed_init_method: null
distributed_port: -1
device_id: 0
local_rank: 0
distributed_no_spawn: false
ddp_backend: "c10d"
bucket_cap_mb: 25
fix_batches_to_gpus: false
find_unused_parameters: false
fast_stat_sync: false
broadcast_buffers: false
distributed_wrapper: "DDP"
slowmo_momentum: null
slowmo_algorithm: "LocalSGD"
localsgd_frequency: 3
dataset:
num_workers: 1
skip_invalid_size_inputs_valid_test: false
max_tokens: null
batch_size: ${params.dataset.batch_size}
required_batch_size_multiple: 8
dataset_impl: null
data_buffer_size: 10
train_subset: "train"
valid_subset: "valid"
validate_interval: 1
fixed_validation_seed: null
disable_validation: false
curriculum: 0
gen_subset: "test"
num_shards: 1
shard_id: 0
max_tokens_valid: ${params.dataset.max_tokens}
batch_size_valid: ${params.dataset.batch_size}
optimization:
max_epoch: 0
max_update: 0
clip_norm: 25.0
sentence_avg: false
update_freq: [1]
lr: [0.25]
min_lr: -1.0
use_bmuf: false
checkpoint:
save_dir: "checkpoints"
restore_file: "checkpoint_last.pt"
reset_dataloader: false
reset_lr_scheduler: false
reset_meters: false
reset_optimizer: false
optimizer_overrides: "{}"
save_interval: 1
save_interval_updates: 0
keep_interval_updates: -1
keep_last_epochs: -1
keep_best_checkpoints: -1
no_save: false
no_epoch_checkpoints: false
no_last_checkpoints: false
no_save_optimizer_state: false
best_checkpoint_metric: "loss"
maximize_best_checkpoint_metric: false
patience: -1
common_eval:
path: null
remove_bpe: null
quiet: false
model_overrides: "{}"
results_path: null
eval_lm:
output_word_probs: false
output_word_stats: false
context_window: 0
bmuf:
block_lr: 1
block_momentum: 0.875
global_sync_iter: 50
warmup_iterations: 500
use_nbm: false
average_sync: false
# @package _group_
common:
no_progress_bar: false
log_interval: 100
log_format: null
tensorboard_logdir: null
seed: 1
cpu: false
fp16: false
memory_efficient_fp16: false
fp16_no_flatten_grads: false
fp16_init_scale: 128
fp16_scale_window: null
fp16_scale_tolerance: 0.0
min_loss_scale: 1.0e-4
threshold_loss_scale: null
user_dir: null
empty_cache_freq: 0
all_gather_list_size: 16384
model_parallel_size: 1
checkpoint_suffix: ""
quantization_config_path: null
distributed_training:
distributed_rank: 0
distributed_backend: "nccl"
distributed_init_method: null
distributed_port: -1
device_id: 0
local_rank: 0
distributed_no_spawn: false
ddp_backend: "c10d"
bucket_cap_mb: 25
fix_batches_to_gpus: false
find_unused_parameters: false
fast_stat_sync: false
broadcast_buffers: false
distributed_wrapper: "DDP"
slowmo_momentum: null
slowmo_algorithm: "LocalSGD"
localsgd_frequency: 3
dataset:
num_workers: 1
skip_invalid_size_inputs_valid_test: false
max_tokens: null
batch_size: ${params.dataset.batch_size}
required_batch_size_multiple: 8
dataset_impl: null
data_buffer_size: 10
train_subset: "train"
valid_subset: "valid"
validate_interval: 1
fixed_validation_seed: null
disable_validation: false
curriculum: 0
gen_subset: "test"
num_shards: 1
shard_id: 0
max_tokens_valid: ${params.dataset.max_tokens}
batch_size_valid: ${params.dataset.batch_size}
optimization:
max_epoch: 0
max_update: 0
clip_norm: 25.0
sentence_avg: false
update_freq: [1]
lr: [0.25]
min_lr: -1.0
use_bmuf: false
checkpoint:
save_dir: "checkpoints"
restore_file: "checkpoint_last.pt"
reset_dataloader: false
reset_lr_scheduler: false
reset_meters: false
reset_optimizer: false
optimizer_overrides: "{}"
save_interval: 1
save_interval_updates: 0
keep_interval_updates: -1
keep_last_epochs: -1
keep_best_checkpoints: -1
no_save: false
no_epoch_checkpoints: false
no_last_checkpoints: false
no_save_optimizer_state: false
best_checkpoint_metric: "loss"
maximize_best_checkpoint_metric: false
patience: -1
bmuf:
block_lr: 1
block_momentum: 0.875
global_sync_iter: 50
warmup_iterations: 500
use_nbm: false
average_sync: false
# @package _group_
data: ???
sample_break_mode: "none"
tokens_per_sample: 1024
output_dictionary_size: -1
self_target: false
future_target: false
past_target: false
add_bos_token: false
max_target_positions: null
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = python -msphinx
SPHINXPROJ = fairseq
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
\ No newline at end of file
.wy-table-responsive table td kbd {
white-space: nowrap;
}
.wy-table-responsive table td {
white-space: normal !important;
}
.wy-table-responsive {
overflow: visible !important;
}
.. _Command-line Tools:
Command-line Tools
==================
Fairseq provides several command-line tools for training and evaluating models:
- :ref:`fairseq-preprocess`: Data pre-processing: build vocabularies and binarize training data
- :ref:`fairseq-train`: Train a new model on one or multiple GPUs
- :ref:`fairseq-generate`: Translate pre-processed data with a trained model
- :ref:`fairseq-interactive`: Translate raw text with a trained model
- :ref:`fairseq-score`: BLEU scoring of generated translations against reference translations
- :ref:`fairseq-eval-lm`: Language model evaluation
.. _fairseq-preprocess:
fairseq-preprocess
~~~~~~~~~~~~~~~~~~
.. automodule:: fairseq_cli.preprocess
.. argparse::
:module: fairseq.options
:func: get_preprocessing_parser
:prog: fairseq-preprocess
.. _fairseq-train:
fairseq-train
~~~~~~~~~~~~~
.. automodule:: fairseq_cli.train
.. argparse::
:module: fairseq.options
:func: get_training_parser
:prog: fairseq-train
.. _fairseq-generate:
fairseq-generate
~~~~~~~~~~~~~~~~
.. automodule:: fairseq_cli.generate
.. argparse::
:module: fairseq.options
:func: get_generation_parser
:prog: fairseq-generate
.. _fairseq-interactive:
fairseq-interactive
~~~~~~~~~~~~~~~~~~~
.. automodule:: fairseq_cli.interactive
.. argparse::
:module: fairseq.options
:func: get_interactive_generation_parser
:prog: fairseq-interactive
.. _fairseq-score:
fairseq-score
~~~~~~~~~~~~~
.. automodule:: fairseq_cli.score
.. argparse::
:module: fairseq_cli.score
:func: get_parser
:prog: fairseq-score
.. _fairseq-eval-lm:
fairseq-eval-lm
~~~~~~~~~~~~~~~
.. automodule:: fairseq_cli.eval_lm
.. argparse::
:module: fairseq.options
:func: get_eval_lm_parser
:prog: fairseq-eval-lm
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# fairseq documentation build configuration file, created by
# sphinx-quickstart on Fri Aug 17 21:45:30 2018.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
import os
import sys
# source code directory, relative to this file, for sphinx-autobuild
sys.path.insert(0, os.path.abspath(".."))
source_suffix = [".rst"]
# -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.intersphinx",
"sphinx.ext.viewcode",
"sphinx.ext.napoleon",
"sphinxarg.ext",
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
# The master toctree document.
master_doc = "index"
# General information about the project.
project = "fairseq"
copyright = "2019, Facebook AI Research (FAIR)"
author = "Facebook AI Research (FAIR)"
github_doc_root = "https://github.com/pytorch/fairseq/tree/master/docs/"
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = "0.10.2"
# The full version, including alpha/beta/rc tags.
release = "0.10.2"
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"
highlight_language = "python"
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = "sphinx_rtd_theme"
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]
html_context = {
"css_files": [
"_static/theme_overrides.css", # override wide tables in RTD theme
],
}
# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# This is required for the alabaster theme
# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
# html_sidebars = {
# '**': [
# 'about.html',
# 'navigation.html',
# 'relations.html', # needs 'show_related': True theme option to display
# 'searchbox.html',
# 'donate.html',
# ]
# }
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {
"numpy": ("http://docs.scipy.org/doc/numpy/", None),
"python": ("https://docs.python.org/", None),
"torch": ("https://pytorch.org/docs/master/", None),
}
.. role:: hidden
:class: hidden-section
.. _Criterions:
Criterions
==========
Criterions compute the loss function given the model and batch, roughly::
loss = criterion(model, batch)
.. automodule:: fairseq.criterions
:members:
.. autoclass:: fairseq.criterions.FairseqCriterion
:members:
:undoc-members:
.. autoclass:: fairseq.criterions.adaptive_loss.AdaptiveLoss
:members:
:undoc-members:
.. autoclass:: fairseq.criterions.composite_loss.CompositeLoss
:members:
:undoc-members:
.. autoclass:: fairseq.criterions.cross_entropy.CrossEntropyCriterion
:members:
:undoc-members:
.. autoclass:: fairseq.criterions.label_smoothed_cross_entropy.LabelSmoothedCrossEntropyCriterion
:members:
:undoc-members:
.. role:: hidden
:class: hidden-section
.. module:: fairseq.data
Data Loading and Utilities
==========================
.. _datasets:
Datasets
--------
**Datasets** define the data format and provide helpers for creating
mini-batches.
.. autoclass:: fairseq.data.FairseqDataset
:members:
.. autoclass:: fairseq.data.LanguagePairDataset
:members:
.. autoclass:: fairseq.data.MonolingualDataset
:members:
**Helper Datasets**
These datasets wrap other :class:`fairseq.data.FairseqDataset` instances and
provide additional functionality:
.. autoclass:: fairseq.data.BacktranslationDataset
:members:
.. autoclass:: fairseq.data.ConcatDataset
:members:
.. autoclass:: fairseq.data.ResamplingDataset
:members:
.. autoclass:: fairseq.data.RoundRobinZipDatasets
:members:
.. autoclass:: fairseq.data.TransformEosDataset
:members:
Dictionary
----------
.. autoclass:: fairseq.data.Dictionary
:members:
Iterators
---------
.. autoclass:: fairseq.data.CountingIterator
:members:
.. autoclass:: fairseq.data.EpochBatchIterator
:members:
.. autoclass:: fairseq.data.GroupedIterator
:members:
.. autoclass:: fairseq.data.ShardedIterator
:members:
[writers]
option-limit=0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment