Add documentation

6381cc97 · Myle Ott · 0e101e9c · 6381cc97 · 6381cc97 · 6381cc97
Commit 6381cc97 authored Sep 03, 2018 by Myle Ott
Hide whitespace changes
Inline Side-by-side

Showing with 18 additions and 5 deletions

interactive.py interactive.py +3 -0

preprocess.py preprocess.py +4 -3

score.py score.py +8 -2

train.py train.py +3 -0

No files found.
--- a/interactive.py
+++ b/interactive.py
@@ -5,6 +5,9 @@
 # This source code is licensed under the license found in the LICENSE file in
 # the root directory of this source tree. An additional grant of patent rights
 # can be found in the PATENTS file in the same directory.
+"""
+Translate raw text with a trained model. Batches data on-the-fly.
+"""

 from collections import namedtuple
 import numpy as np

--- a/preprocess.py
+++ b/preprocess.py
@@ -5,7 +5,9 @@
 # This source code is licensed under the license found in the LICENSE file in
 # the root directory of this source tree. An additional grant of patent rights
 # can be found in the PATENTS file in the same directory.
-#
+"""
+Data pre-processing: build vocabularies and binarize training data.
+"""

 import argparse
 from itertools import zip_longest
@@ -17,8 +19,7 @@ from fairseq.tokenizer import Tokenizer, tokenize_line


 def get_parser():
-    parser = argparse.ArgumentParser(
-        description='Data pre-processing: Create dictionary and store data in binary format')
+    parser = argparse.ArgumentParser()
    parser.add_argument('-s', '--source-lang', default=None, metavar='SRC', help='source language')
    parser.add_argument('-t', '--target-lang', default=None, metavar='TARGET', help='target language')
    parser.add_argument('--trainpref', metavar='FP', default=None, help='train file prefix')

--- a/score.py
+++ b/score.py
@@ -5,7 +5,9 @@
 # This source code is licensed under the license found in the LICENSE file in
 # the root directory of this source tree. An additional grant of patent rights
 # can be found in the PATENTS file in the same directory.
-#
+"""
+BLEU scoring of generated translations against reference translations.
+"""

 import argparse
 import os
@@ -15,7 +17,7 @@ from fairseq import bleu, tokenizer
 from fairseq.data import dictionary


-def main():
+def get_parser():
    parser = argparse.ArgumentParser(description='Command-line script for BLEU scoring.')
    parser.add_argument('-s', '--sys', default='-', help='system output')
    parser.add_argument('-r', '--ref', required=True, help='references')
@@ -23,7 +25,11 @@ def main():
                        type=int, help='consider ngrams up to this order')
    parser.add_argument('--ignore-case', action='store_true',
                        help='case-insensitive scoring')
+    return parser

+
+def main():
+    parser = get_parser()
    args = parser.parse_args()
    print(args)


--- a/train.py
+++ b/train.py
@@ -5,6 +5,9 @@
 # This source code is licensed under the license found in the LICENSE file in
 # the root directory of this source tree. An additional grant of patent rights
 # can be found in the PATENTS file in the same directory.
+"""
+Train a new model on one or across multiple GPUs.
+"""

 import collections
 import itertools