Commit 24684cbb authored by mshoeybi's avatar mshoeybi
Browse files

added BOS

parent ff2f0a05
......@@ -18,9 +18,48 @@
import torch
from megatron import mpu
from .communication import broadcast_float_list
from .generation import generate_tokens_probs_and_return_on_first_stage
from .tokenization import tokenize_prompts
from .tokenization import (
tokenize_prompts,
detokenize_generations)
def generate_and_post_process(model,
prompts=None,
tokens_to_generate=0,
return_output_log_probs=False,
return_all_log_probs=False,
temperature=1.0,
add_BOS=False):
"""TO DO ..."""
# Main inference.
tokens, lengths, output_log_probs, all_log_probs = generate(
model,
prompts=prompts,
tokens_to_generate=tokens_to_generate,
return_output_log_probs=return_output_log_probs,
return_all_log_probs=return_all_log_probs,
temperature=temperature,
add_BOS=add_BOS)
# Only post-process on first stage.
if mpu.is_pipeline_first_stage():
tokens, prompts_plus_generations, prompts_plus_generations_segments = \
detokenize_generations(tokens, lengths, True)
if return_output_log_probs:
output_log_probs = output_log_probs.cpu().numpy().tolist()
if return_all_log_probs:
all_log_probs = all_log_probs.cpu().numpy() #.tolist()
return prompts_plus_generations, prompts_plus_generations_segments, \
output_log_probs, all_log_probs, tokens
return None
def generate(model,
......@@ -28,24 +67,27 @@ def generate(model,
tokens_to_generate=0,
return_output_log_probs=False,
return_all_log_probs=False,
temperature=1.0):
temperature=1.0,
add_BOS=False):
"""TO DO ..."""
# Make sure input params are avaialble to all ranks.
values = [tokens_to_generate, return_output_log_probs,
return_all_log_probs, temperature]
values_float_tensor = broadcast_float_list(4, float_list=values)
return_all_log_probs, temperature, add_BOS]
values_float_tensor = broadcast_float_list(5, float_list=values)
tokens_to_generate = int(values_float_tensor[0].item())
return_output_log_probs = bool(values_float_tensor[1].item())
return_all_log_probs = bool(values_float_tensor[2].item())
temperature = values_float_tensor[2].item()
temperature = values_float_tensor[3].item()
add_BOS = bool(values_float_tensor[4].item())
# Tokenize prompts and get the batch.
# Note that these tensors are broadcaseted to all ranks.
if torch.distributed.get_rank() == 0:
assert prompts is not None
assert tokens_to_generate > 0
context_tokens_tensor, context_length_tensor = tokenize_prompts(
prompts=prompts, tokens_to_generate=tokens_to_generate)
prompts=prompts, tokens_to_generate=tokens_to_generate, add_BOS=add_BOS)
# Main inference function.
# Note that the outputs are available on the first stage.
......
......@@ -57,7 +57,8 @@ def detokenize_generations(tokens_gpu_tensor,
return tokens, prompts_plus_generations
def tokenize_prompts(prompts=None, tokens_to_generate=None, rank=0):
def tokenize_prompts(prompts=None, tokens_to_generate=None,
add_BOS=None, rank=0):
"""Tokenize prompts and make them avaiable on all ranks."""
# On all ranks set to None so we can pass them to functions
......@@ -71,7 +72,7 @@ def tokenize_prompts(prompts=None, tokens_to_generate=None, rank=0):
assert tokens_to_generate is not None
# Tensor of tokens padded and their unpadded length.
prompts_tokens_cuda_long_tensor, prompts_length_cuda_long_tensor = \
_tokenize_prompts_and_batch(prompts, tokens_to_generate)
_tokenize_prompts_and_batch(prompts, tokens_to_generate, add_BOS)
# We need the sizes of these tensors for the boradcast
sizes_list = [prompts_tokens_cuda_long_tensor.size(0), # Batch size
prompts_tokens_cuda_long_tensor.size(1)] # Sequence lenght
......@@ -91,7 +92,7 @@ def tokenize_prompts(prompts=None, tokens_to_generate=None, rank=0):
return prompts_tokens_cuda_long_tensor, prompts_length_cuda_long_tensor
def _tokenize_prompts_and_batch(prompts, tokens_to_generate):
def _tokenize_prompts_and_batch(prompts, tokens_to_generate, add_BOS):
"""Given a set of prompts and number of tokens to generate:
- tokenize prompts
- set the sequence length to be the max of length of prompts
......@@ -102,7 +103,11 @@ def _tokenize_prompts_and_batch(prompts, tokens_to_generate):
# Tokenize all the prompts.
tokenizer = get_tokenizer()
prompts_tokens = [tokenizer.tokenize(prompt) for prompt in prompts]
if add_BOS:
prompts_tokens = [[tokenizer.eod] + tokenizer.tokenize(prompt)
for prompt in prompts]
else:
prompts_tokens = [tokenizer.tokenize(prompt) for prompt in prompts]
# Now we have a list of list of tokens which each list has a different
# size. We want to extend this list to:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment