Commit 6fd0b406 authored by zihanl's avatar zihanl
Browse files

merge with main branch

parents 492fdf83 60750922
# coding=utf-8
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Setup for pip package."""
import os
import sys
import setuptools
if sys.version_info < (3,):
raise Exception("Python 2 is not supported by Megatron.")
from megatron.package_info import (
__description__,
__contact_names__,
__url__,
__download_url__,
__keywords__,
__license__,
__package_name__,
__version__,
)
with open("README.md", "r") as fh:
long_description = fh.read()
###############################################################################
# Dependency Loading #
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
def req_file(filename):
with open(filename) as f:
content = f.readlines()
return [x.strip() for x in content]
install_requires = req_file("requirements.txt")
setuptools.setup(
name=__package_name__,
# Versions should comply with PEP440. For a discussion on single-sourcing
# the version across setup.py and the project code, see
# https://packaging.python.org/en/latest/single_source_version.html
version=__version__,
description=__description__,
long_description=long_description,
long_description_content_type="text/markdown",
# The project's main homepage.
url=__url__,
author=__contact_names__,
maintainer=__contact_names__,
# The licence under which the project is released
license=__license__,
classifiers=[
'Intended Audience :: Developers',
'Intended Audience :: Science/Research',
'Intended Audience :: Information Technology',
# Indicate what your project relates to
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'Topic :: Software Development :: Libraries :: Python Modules',
# Supported python versions
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
# Additional Setting
'Environment :: Console',
'Natural Language :: English',
'Operating System :: OS Independent',
],
python_requires='>=3.6',
packages=setuptools.find_packages(),
install_requires=install_requires,
# Add in any packaged data.
include_package_data=True,
zip_safe=False,
# PyPI package information.
keywords=__keywords__
)
...@@ -25,6 +25,7 @@ from megatron import get_timers ...@@ -25,6 +25,7 @@ from megatron import get_timers
from megatron import mpu from megatron import mpu
from megatron.checkpointing import load_checkpoint from megatron.checkpointing import load_checkpoint
from megatron.checkpointing import save_checkpoint from megatron.checkpointing import save_checkpoint
from megatron.model import ModelType
from megatron.training import evaluate_and_print_results from megatron.training import evaluate_and_print_results
from megatron.training import setup_model_and_optimizer from megatron.training import setup_model_and_optimizer
from megatron.training import train_step from megatron.training import train_step
...@@ -253,6 +254,7 @@ def _train(model, optimizer, lr_scheduler, forward_step, ...@@ -253,6 +254,7 @@ def _train(model, optimizer, lr_scheduler, forward_step,
def finetune(train_valid_datasets_provider, model_provider, def finetune(train_valid_datasets_provider, model_provider,
model_type=ModelType.encoder_or_decoder,
forward_step=_cross_entropy_forward_step, forward_step=_cross_entropy_forward_step,
end_of_epoch_callback_provider=None, end_of_epoch_callback_provider=None,
task_collate_fn=None): task_collate_fn=None):
...@@ -282,7 +284,7 @@ def finetune(train_valid_datasets_provider, model_provider, ...@@ -282,7 +284,7 @@ def finetune(train_valid_datasets_provider, model_provider,
# Build model, optimizer and learning rate scheduler. # Build model, optimizer and learning rate scheduler.
timers('model and optimizer').start() timers('model and optimizer').start()
model, optimizer, lr_scheduler = setup_model_and_optimizer(model_provider) model, optimizer, lr_scheduler = setup_model_and_optimizer(model_provider, model_type)
timers('model and optimizer').stop() timers('model and optimizer').stop()
# If pretrained checkpoint is provided and we have not trained for # If pretrained checkpoint is provided and we have not trained for
......
# Multi-Stage Prompting for Knowledgeable Dialogue Generation
We present the steps to run our multi-stage dialogue prompting (MSDP) as well as the finetuning-based baselines (FKG and FCM).
## MSDP
### Knowledge Generation
### Response Generation
...@@ -141,9 +141,9 @@ def evaluate_f1(guess_file, answer_file): ...@@ -141,9 +141,9 @@ def evaluate_f1(guess_file, answer_file):
def main(): def main():
args = get_args() args = get_args()
if 'ppl' in args.task: if 'PPL' in args.task:
evaluate_ppl(test_dataset_provider, model_provider, forward_step) evaluate_ppl(test_dataset_provider, model_provider, forward_step)
elif 'f1' in args.task: elif 'F1' in args.task:
evaluate_f1(args.guess_file, args.answer_file) evaluate_f1(args.guess_file, args.answer_file)
...@@ -200,7 +200,7 @@ def run_generation(model_provider): ...@@ -200,7 +200,7 @@ def run_generation(model_provider):
def main(): def main():
args = get_args() args = get_args()
if "finetune" in args.task: if "FINETUNE" in args.task:
finetune(train_valid_datasets_provider, model_provider, \ finetune(train_valid_datasets_provider, model_provider, \
forward_step=forward_step) forward_step=forward_step)
else: else:
......
...@@ -59,10 +59,7 @@ def forward_step(model, tokens, position_ids, attention_mask, tokentype_ids, ...@@ -59,10 +59,7 @@ def forward_step(model, tokens, position_ids, attention_mask, tokentype_ids,
model, (torchDDP, LocalDDP, Float16Module)) model, (torchDDP, LocalDDP, Float16Module))
unwrapped_model.set_input_tensor(input_tensor) unwrapped_model.set_input_tensor(input_tensor)
output_tensor = model(tokens, position_ids, attention_mask, output_tensor = model(tokens, position_ids, attention_mask,
tokentype_ids=tokentype_ids, tokentype_ids=tokentype_ids)
layer_past=layer_past,
get_key_value=get_key_value,
forward_method_parallel_output=forward_method_parallel_output)
if get_key_value: if get_key_value:
output_tensor, layer_past = output_tensor output_tensor, layer_past = output_tensor
......
...@@ -140,11 +140,11 @@ if __name__ == '__main__': ...@@ -140,11 +140,11 @@ if __name__ == '__main__':
from orqa.evaluate_orqa import main from orqa.evaluate_orqa import main
elif args.task in ['RET-FINETUNE-NQ']: elif args.task in ['RET-FINETUNE-NQ']:
from orqa.supervised.finetune import main from orqa.supervised.finetune import main
elif args.task == 'knwl-dialo-prompt': elif args.task == 'KNWL-DIALO-PROMPT':
from knwl_dialo.prompt import main from knwl_dialo.prompt import main
elif args.task in ['knwl-dialo-finetune', 'knwl-dialo-gen']: elif args.task in ['KNWL-DIALO-FINETUNE', 'KNWL-DIALO-GEN']:
from knwl_dialo.finetune import main from knwl_dialo.finetune import main
elif args.task == 'knwl-dialo-eval-f1': elif args.task == 'KNWL-DIALO-EVAL-F1':
from knwl_dialo.evaluate import main from knwl_dialo.evaluate import main
else: else:
raise NotImplementedError('Task {} is not implemented.'.format( raise NotImplementedError('Task {} is not implemented.'.format(
......
...@@ -205,7 +205,7 @@ def main(): ...@@ -205,7 +205,7 @@ def main():
args.task)) args.task))
# Set up model and load checkpoint. # Set up model and load checkpoint.
model = get_model(get_model_provider(eval_metric)) model = get_model(get_model_provider(eval_metric), wrap_with_ddp=False)
if args.load is not None: if args.load is not None:
_ = load_checkpoint(model, None, None) _ = load_checkpoint(model, None, None)
......
...@@ -20,7 +20,7 @@ python blacklist_urls.py <path to the dowloaded deduplicated URLs> <filename for ...@@ -20,7 +20,7 @@ python blacklist_urls.py <path to the dowloaded deduplicated URLs> <filename for
4. Merge the contents into one loose json file with 1 json per newline of the format `{'text': text, 'url': unique_url}`. It is important for the url to be unique. 4. Merge the contents into one loose json file with 1 json per newline of the format `{'text': text, 'url': unique_url}`. It is important for the url to be unique.
# Prepare the data for GPT-2 training: # Prepare the data for GPT training:
1. Perform ftfy, english detection and remove documents with less than 128 tokens. This step can be sharded and run on shards. 1. Perform ftfy, english detection and remove documents with less than 128 tokens. This step can be sharded and run on shards.
``` ```
...@@ -50,7 +50,7 @@ shuf <cleaned deduped data file> -o train_data.json ...@@ -50,7 +50,7 @@ shuf <cleaned deduped data file> -o train_data.json
To deduplicate the downstream tasks (e.g. lambada, squad) from the training dataset, we run the following command. To deduplicate the downstream tasks (e.g. lambada, squad) from the training dataset, we run the following command.
``` ```
python filter_ngrams.py --tasks <name of he task, e.g. lambada, squad> --dedup-dataset <training dataset to deduplicate> <json key> --output <output training dataset> python filter_ngrams.py --tasks <name of the task, e.g. lambada, squad> --dedup-dataset <training dataset to deduplicate> <json key> --output <output training dataset>
``` ```
We use 13-grams by default for the deduplication. When we find a 13-gram match in a training document, we split the document into two pieces and remove the 13-gram along with 200 characters from the both side of the 13-gram. We also remove any splitted document with less than 200 characters or if a document got splitted more than 10 times. These parameters can be changed using corresponding arguments. We use 13-grams by default for the deduplication. When we find a 13-gram match in a training document, we split the document into two pieces and remove the 13-gram along with 200 characters from the both side of the 13-gram. We also remove any splitted document with less than 200 characters or if a document got splitted more than 10 times. These parameters can be changed using corresponding arguments.
......
...@@ -14,69 +14,45 @@ ...@@ -14,69 +14,45 @@
# limitations under the License. # limitations under the License.
"""Sample Generate GPT""" """Sample Generate GPT"""
import os import os
import sys import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__),
os.path.pardir))) os.path.pardir)))
import socket
from megatron import get_args from megatron import get_args
from megatron import print_rank_0 from megatron import print_rank_0
from megatron import get_tokenizer
from megatron import mpu from megatron import mpu
from megatron.checkpointing import load_checkpoint from megatron.checkpointing import load_checkpoint
from megatron.initialize import initialize_megatron from megatron.initialize import initialize_megatron
from megatron.model import GPTModel from megatron.model import GPTModel
from megatron.training import get_model from megatron.training import get_model
from megatron.text_generation_utils import generate_and_write_samples_unconditional from megatron.text_generation_server import MegatronServer
from megatron.text_generation_utils import generate_samples_input_from_file from megatron.text_generation import generate_and_post_process
from megatron.text_generation_utils import generate_samples_interactive import torch
def model_provider(pre_process=True, post_process=True): def model_provider(pre_process=True, post_process=True):
"""Build the model.""" """Build the model."""
print_rank_0('building GPT model ...') print_rank_0('building GPT model ...')
model = GPTModel(num_tokentypes=0, parallel_output=False, model = GPTModel(num_tokentypes=0, parallel_output=False, pre_process=pre_process, post_process=post_process)
pre_process=pre_process, post_process=post_process)
return model return model
def add_text_generate_args(parser): def add_text_generate_args(parser):
"""Text generation arguments."""
group = parser.add_argument_group(title='text generation') group = parser.add_argument_group(title='text generation')
group.add_argument("--temperature", type=float, default=1.0, group.add_argument("--temperature", type=float, default=1.0,
help='Sampling temperature.') help='Sampling temperature.')
group.add_argument("--greedy", action='store_true', default=False,
help='Use greedy sampling.')
group.add_argument("--top_p", type=float, default=0.0, group.add_argument("--top_p", type=float, default=0.0,
help='Top p sampling.') help='Top p sampling.')
group.add_argument("--top_k", type=int, default=0, group.add_argument("--top_k", type=int, default=0,
help='Top k sampling.') help='Top k sampling.')
group.add_argument("--out-seq-length", type=int, default=1024, group.add_argument("--out-seq-length", type=int, default=1024,
help='Size of the output generated text.') help='Size of the output generated text.')
group.add_argument("--sample-input-file", type=str, default=None,
help='Get input from file instead of interactive mode, '
'each line is an input.')
group.add_argument("--sample-output-file", type=str, default=None,
help='Output file got from --sample-input-file')
group.add_argument("--num-samples", type=int, default=0,
help='Number of samples to generate unconditionally, '
'defaults to 0 and interactive conditional sampling')
group.add_argument("--genfile", type=str,
help='Output file when generating unconditionally')
group.add_argument("--recompute", action='store_true',
help='During generation recompute all attention '
'instead of using previously computed keys/values.')
return parser return parser
def main(): if __name__ == "__main__":
"""Main program."""
initialize_megatron(extra_args_provider=add_text_generate_args, initialize_megatron(extra_args_provider=add_text_generate_args,
args_defaults={'tokenizer_type': 'GPT2BPETokenizer', args_defaults={'tokenizer_type': 'GPT2BPETokenizer',
'no_load_rng': True, 'no_load_rng': True,
...@@ -86,27 +62,20 @@ def main(): ...@@ -86,27 +62,20 @@ def main():
if args.num_layers_per_virtual_pipeline_stage is not None: if args.num_layers_per_virtual_pipeline_stage is not None:
print("Interleaved pipeline schedule is not yet supported for text generation.") print("Interleaved pipeline schedule is not yet supported for text generation.")
exit() exit()
# Set up model and load checkpoint
# Set up model and load checkpoint. model = get_model(model_provider, wrap_with_ddp=False)
model = get_model(model_provider)
if args.load is not None: if args.load is not None:
_ = load_checkpoint(model, None, None) _ = load_checkpoint(model, None, None)
assert len(model) == 1, "Above condition should have caught this" assert len(model) == 1, "Above condition should have caught this"
model = model[0] model = model[0]
if mpu.is_pipeline_first_stage() and mpu.get_tensor_model_parallel_rank() == 0:
# Generate samples. server = MegatronServer(model)
if args.num_samples == 0: server.run("0.0.0.0")
if args.sample_input_file != None:
args.micro_batch_size = 1 while True:
generate_samples_input_from_file(model) choice = torch.cuda.LongTensor(1)
else: torch.distributed.broadcast(choice, 0)
generate_samples_interactive(model) if choice[0].item() == 0:
else: generate_and_post_process(model)
generate_and_write_samples_unconditional(model)
if __name__ == "__main__":
main()
# coding=utf-8
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import sys
import urllib2
class PutRequest(urllib2.Request):
'''class to handling putting with urllib2'''
def get_method(self, *args, **kwargs):
return 'PUT'
if __name__ == "__main__":
url = sys.argv[1]
while True:
sentence = raw_input("Enter prompt: ")
tokens_to_generate = int(input("Enter number of tokens to generate: "))
data = json.dumps({"prompts": [sentence], "tokens_to_generate":tokens_to_generate})
req = PutRequest(url, data, {'Content-Type': 'application/json'})
response = urllib2.urlopen(req)
resp_sentences = json.load(response)
print("Megatron Response: ")
print(resp_sentences["text"][0])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment