"src/vscode:/vscode.git/clone" did not exist on "bda825f910c3c4c3c33de4530a911cbb62899f8e"
Commit 24684cbb authored by mshoeybi's avatar mshoeybi
Browse files

added BOS

parent ff2f0a05
...@@ -18,9 +18,48 @@ ...@@ -18,9 +18,48 @@
import torch import torch
from megatron import mpu
from .communication import broadcast_float_list from .communication import broadcast_float_list
from .generation import generate_tokens_probs_and_return_on_first_stage from .generation import generate_tokens_probs_and_return_on_first_stage
from .tokenization import tokenize_prompts from .tokenization import (
tokenize_prompts,
detokenize_generations)
def generate_and_post_process(model,
prompts=None,
tokens_to_generate=0,
return_output_log_probs=False,
return_all_log_probs=False,
temperature=1.0,
add_BOS=False):
"""TO DO ..."""
# Main inference.
tokens, lengths, output_log_probs, all_log_probs = generate(
model,
prompts=prompts,
tokens_to_generate=tokens_to_generate,
return_output_log_probs=return_output_log_probs,
return_all_log_probs=return_all_log_probs,
temperature=temperature,
add_BOS=add_BOS)
# Only post-process on first stage.
if mpu.is_pipeline_first_stage():
tokens, prompts_plus_generations, prompts_plus_generations_segments = \
detokenize_generations(tokens, lengths, True)
if return_output_log_probs:
output_log_probs = output_log_probs.cpu().numpy().tolist()
if return_all_log_probs:
all_log_probs = all_log_probs.cpu().numpy() #.tolist()
return prompts_plus_generations, prompts_plus_generations_segments, \
output_log_probs, all_log_probs, tokens
return None
def generate(model, def generate(model,
...@@ -28,24 +67,27 @@ def generate(model, ...@@ -28,24 +67,27 @@ def generate(model,
tokens_to_generate=0, tokens_to_generate=0,
return_output_log_probs=False, return_output_log_probs=False,
return_all_log_probs=False, return_all_log_probs=False,
temperature=1.0): temperature=1.0,
add_BOS=False):
"""TO DO ...""" """TO DO ..."""
# Make sure input params are avaialble to all ranks. # Make sure input params are avaialble to all ranks.
values = [tokens_to_generate, return_output_log_probs, values = [tokens_to_generate, return_output_log_probs,
return_all_log_probs, temperature] return_all_log_probs, temperature, add_BOS]
values_float_tensor = broadcast_float_list(4, float_list=values) values_float_tensor = broadcast_float_list(5, float_list=values)
tokens_to_generate = int(values_float_tensor[0].item()) tokens_to_generate = int(values_float_tensor[0].item())
return_output_log_probs = bool(values_float_tensor[1].item()) return_output_log_probs = bool(values_float_tensor[1].item())
return_all_log_probs = bool(values_float_tensor[2].item()) return_all_log_probs = bool(values_float_tensor[2].item())
temperature = values_float_tensor[2].item() temperature = values_float_tensor[3].item()
add_BOS = bool(values_float_tensor[4].item())
# Tokenize prompts and get the batch. # Tokenize prompts and get the batch.
# Note that these tensors are broadcaseted to all ranks. # Note that these tensors are broadcaseted to all ranks.
if torch.distributed.get_rank() == 0: if torch.distributed.get_rank() == 0:
assert prompts is not None assert prompts is not None
assert tokens_to_generate > 0
context_tokens_tensor, context_length_tensor = tokenize_prompts( context_tokens_tensor, context_length_tensor = tokenize_prompts(
prompts=prompts, tokens_to_generate=tokens_to_generate) prompts=prompts, tokens_to_generate=tokens_to_generate, add_BOS=add_BOS)
# Main inference function. # Main inference function.
# Note that the outputs are available on the first stage. # Note that the outputs are available on the first stage.
......
...@@ -57,7 +57,8 @@ def detokenize_generations(tokens_gpu_tensor, ...@@ -57,7 +57,8 @@ def detokenize_generations(tokens_gpu_tensor,
return tokens, prompts_plus_generations return tokens, prompts_plus_generations
def tokenize_prompts(prompts=None, tokens_to_generate=None, rank=0): def tokenize_prompts(prompts=None, tokens_to_generate=None,
add_BOS=None, rank=0):
"""Tokenize prompts and make them avaiable on all ranks.""" """Tokenize prompts and make them avaiable on all ranks."""
# On all ranks set to None so we can pass them to functions # On all ranks set to None so we can pass them to functions
...@@ -71,7 +72,7 @@ def tokenize_prompts(prompts=None, tokens_to_generate=None, rank=0): ...@@ -71,7 +72,7 @@ def tokenize_prompts(prompts=None, tokens_to_generate=None, rank=0):
assert tokens_to_generate is not None assert tokens_to_generate is not None
# Tensor of tokens padded and their unpadded length. # Tensor of tokens padded and their unpadded length.
prompts_tokens_cuda_long_tensor, prompts_length_cuda_long_tensor = \ prompts_tokens_cuda_long_tensor, prompts_length_cuda_long_tensor = \
_tokenize_prompts_and_batch(prompts, tokens_to_generate) _tokenize_prompts_and_batch(prompts, tokens_to_generate, add_BOS)
# We need the sizes of these tensors for the boradcast # We need the sizes of these tensors for the boradcast
sizes_list = [prompts_tokens_cuda_long_tensor.size(0), # Batch size sizes_list = [prompts_tokens_cuda_long_tensor.size(0), # Batch size
prompts_tokens_cuda_long_tensor.size(1)] # Sequence lenght prompts_tokens_cuda_long_tensor.size(1)] # Sequence lenght
...@@ -91,7 +92,7 @@ def tokenize_prompts(prompts=None, tokens_to_generate=None, rank=0): ...@@ -91,7 +92,7 @@ def tokenize_prompts(prompts=None, tokens_to_generate=None, rank=0):
return prompts_tokens_cuda_long_tensor, prompts_length_cuda_long_tensor return prompts_tokens_cuda_long_tensor, prompts_length_cuda_long_tensor
def _tokenize_prompts_and_batch(prompts, tokens_to_generate): def _tokenize_prompts_and_batch(prompts, tokens_to_generate, add_BOS):
"""Given a set of prompts and number of tokens to generate: """Given a set of prompts and number of tokens to generate:
- tokenize prompts - tokenize prompts
- set the sequence length to be the max of length of prompts - set the sequence length to be the max of length of prompts
...@@ -102,6 +103,10 @@ def _tokenize_prompts_and_batch(prompts, tokens_to_generate): ...@@ -102,6 +103,10 @@ def _tokenize_prompts_and_batch(prompts, tokens_to_generate):
# Tokenize all the prompts. # Tokenize all the prompts.
tokenizer = get_tokenizer() tokenizer = get_tokenizer()
if add_BOS:
prompts_tokens = [[tokenizer.eod] + tokenizer.tokenize(prompt)
for prompt in prompts]
else:
prompts_tokens = [tokenizer.tokenize(prompt) for prompt in prompts] prompts_tokens = [tokenizer.tokenize(prompt) for prompt in prompts]
# Now we have a list of list of tokens which each list has a different # Now we have a list of list of tokens which each list has a different
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment