Commit 9c3bb5c6 authored by Peng-Jen Chen's avatar Peng-Jen Chen Committed by Facebook Github Bot
Browse files

Better explain the inference argument format of multilingual translation

Summary:
In https://github.com/pytorch/fairseq/issues/656, people are often confused about how to set multilingual translation parameters at inference time.

This diff add more checks to ensure the arguments (`--lang-pairs`, `--encoder-langtok`, `--decoder-langtok`) load from checkpoint are consistent with arguments specified in generate/interactive command line.
We also add a section in example page to explain how to set the arguments

Reviewed By: myleott

Differential Revision: D15682169

fbshipit-source-id: 64e6db94cd72ea7ce2d0aa1067c9c2dcd3b8a2ac
parent 392fce8a
...@@ -253,3 +253,9 @@ $ cat iwslt17.test.${SRC}-en.${SRC}.bpe | fairseq-interactive data-bin/iwslt17.d ...@@ -253,3 +253,9 @@ $ cat iwslt17.test.${SRC}-en.${SRC}.bpe | fairseq-interactive data-bin/iwslt17.d
$ grep ^H iwslt17.test.${SRC}-en.en.sys | cut -f3 \ $ grep ^H iwslt17.test.${SRC}-en.en.sys | cut -f3 \
| sacrebleu --test-set iwslt17 --language-pair ${SRC}-en | sacrebleu --test-set iwslt17 --language-pair ${SRC}-en
``` ```
### Argument format during inference
During inference it is required to specify a single `--source-lang` and
`--target-lang`, which indicates the inference langauge direction.
`--lang-pairs`, `--encoder-langtok`, `--decoder-langtok` have to be set to
the same value as training.
...@@ -59,7 +59,9 @@ class MultilingualTranslationTask(FairseqTask): ...@@ -59,7 +59,9 @@ class MultilingualTranslationTask(FairseqTask):
implements the `FairseqMultiModel` interface. implements the `FairseqMultiModel` interface.
During inference it is required to specify a single `--source-lang` and During inference it is required to specify a single `--source-lang` and
`--target-lang`, instead of `--lang-pairs`. `--target-lang`, which indicates the inference langauge direction.
`--lang-pairs`, `--encoder-langtok`, `--decoder-langtok` have to be set to
the same value as training.
""" """
@staticmethod @staticmethod
...@@ -128,6 +130,8 @@ class MultilingualTranslationTask(FairseqTask): ...@@ -128,6 +130,8 @@ class MultilingualTranslationTask(FairseqTask):
utils.deprecation_warning('--lazy-load is deprecated, please use --dataset-impl=lazy') utils.deprecation_warning('--lazy-load is deprecated, please use --dataset-impl=lazy')
args.dataset_impl = 'lazy' args.dataset_impl = 'lazy'
if args.lang_pairs is None:
raise ValueError('--lang-pairs is required. List all the language pairs in the training objective.')
args.lang_pairs = args.lang_pairs.split(',') args.lang_pairs = args.lang_pairs.split(',')
sorted_langs = sorted(list({x for lang_pair in args.lang_pairs for x in lang_pair.split('-')})) sorted_langs = sorted(list({x for lang_pair in args.lang_pairs for x in lang_pair.split('-')}))
if args.source_lang is not None or args.target_lang is not None: if args.source_lang is not None or args.target_lang is not None:
...@@ -244,6 +248,21 @@ class MultilingualTranslationTask(FairseqTask): ...@@ -244,6 +248,21 @@ class MultilingualTranslationTask(FairseqTask):
) )
def build_model(self, args): def build_model(self, args):
def check_args():
messages = []
if len(set(self.args.lang_pairs).symmetric_difference(args.lang_pairs)) != 0:
messages.append('--lang-pairs should include all the language pairs {}.'.format(args.lang_pairs))
if self.args.encoder_langtok != args.encoder_langtok:
messages.append('--encoder-langtok should be {}.'.format(args.encoder_langtok))
if self.args.decoder_langtok != args.decoder_langtok:
messages.append('--decoder-langtok should {} be set.'.format("" if args.decoder_langtok else "not"))
if len(messages) > 0:
raise ValueError(' '.join(messages))
# Check if task args are consistant with model args
check_args()
from fairseq import models from fairseq import models
model = models.build_model(args, self) model = models.build_model(args, self)
if not isinstance(model, FairseqMultiModel): if not isinstance(model, FairseqMultiModel):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment