"...git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "3768d4d77ced5142430c30108ccb0dcd52f9d151"
Commit 62b5498b authored by Myle Ott's avatar Myle Ott Committed by Facebook Github Bot
Browse files

Update GPT-2 BPE

Summary: Pull Request resolved: https://github.com/fairinternal/fairseq-py/pull/749

Differential Revision: D16410984

Pulled By: myleott

fbshipit-source-id: 7698df46b8a179afccb287990f9705358690454a
parent 5f78106a
...@@ -9,6 +9,10 @@ from fairseq import file_utils ...@@ -9,6 +9,10 @@ from fairseq import file_utils
from fairseq.data.encoders import register_bpe from fairseq.data.encoders import register_bpe
DEFAULT_ENCODER_JSON = 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json'
DEFAULT_VOCAB_BPE = 'https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe'
@register_bpe('gpt2') @register_bpe('gpt2')
class GPT2BPE(object): class GPT2BPE(object):
...@@ -16,16 +20,20 @@ class GPT2BPE(object): ...@@ -16,16 +20,20 @@ class GPT2BPE(object):
def add_args(parser): def add_args(parser):
# fmt: off # fmt: off
parser.add_argument('--gpt2-encoder-json', type=str, parser.add_argument('--gpt2-encoder-json', type=str,
default='https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json', default=DEFAULT_ENCODER_JSON,
help='path to encoder.json') help='path to encoder.json')
parser.add_argument('--gpt2-vocab-bpe', type=str, parser.add_argument('--gpt2-vocab-bpe', type=str,
default='https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe', default=DEFAULT_VOCAB_BPE,
help='path to vocab.bpe') help='path to vocab.bpe')
# fmt: on # fmt: on
def __init__(self, args): def __init__(self, args):
encoder_json = file_utils.cached_path(args.gpt2_encoder_json) encoder_json = file_utils.cached_path(
vocab_bpe = file_utils.cached_path(args.gpt2_vocab_bpe) getattr(args, 'gpt2_encoder_json', DEFAULT_ENCODER_JSON)
)
vocab_bpe = file_utils.cached_path(
getattr(args, 'gpt2_vocab_bpe', DEFAULT_VOCAB_BPE)
)
self.bpe = get_encoder(encoder_json, vocab_bpe) self.bpe = get_encoder(encoder_json, vocab_bpe)
def encode(self, x: str) -> str: def encode(self, x: str) -> str:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment