added batching

2f08c0c3 · mshoeybi · 554d1cc0 · 2f08c0c3
Commit 2f08c0c3 authored Sep 22, 2021 by mshoeybi
Show whitespace changes
Inline Side-by-side

Showing with 57 additions and 0 deletions

megatron/inference/batching.py megatron/inference/batching.py +57 -0

No files found.
--- a/megatron/inference/batching.py
+++ b/megatron/inference/batching.py
+# coding=utf-8
+# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Batching utilities."""
+import torch
+from megatron import get_tokenizer
+def tokenize_prompts_and_batch(prompts, tokens_to_generate):
+    """Given a set of prompts and number of tokens to generate:
+        - tokenize prompts
+        - set the sequence length to be the max of length of prompts
+          plus the number of tokens we would like to generate
+        - pad all the sequences to this length so we can convert them
+          into a 2D tensor.
+    """
+    # Tokenize all the prompts.
+    tokenizer = get_tokenizer()
+    prompts_tokens = [tokenizer.tokenize(prompt) for prompt in prompts]
+    # Now we have a list of list of tokens which each list has a different
+    # size. We want to extend this list to:
+    #   - incorporate the tokens that need to be generated
+    #   - make all the sequences equal length.
+    # Get the prompts length.
+    prompts_length = [len(prompt_tokens) for prompt_tokens in prompts_tokens]
+    # Get the max prompts length.
+    max_prompt_len = max(prompts_length)
+    # Number of tokens in the each sample of the batch.
+    samples_length = max_prompt_len + tokens_to_generate
+    # Now update the list of list to be of the same size: samples_length.
+    for prompt_tokens, prompt_length in zip(prompts_tokens, prompts_length):
+        padding_size = samples_length - prompt_length
+        prompt_tokens.extend([tokenizer.eod] * padding_size)
+    # Now we are in a structured format, we can convert to tensors.
+    prompts_tokens_tensor = torch.cuda.LongTensor(prompts_tokens)
+    prompts_length_tensor = torch.cuda.LongTensor(prompts_length)
+    return prompts_tokens_tensor, prompts_length_tensor