Removing unnecessary --recompute path

453414da · rprenger · f7fe3865 · 453414da · 453414da
Commit 453414da authored Jun 30, 2021 by rprenger
Show whitespace changes
Inline Side-by-side

Showing with 22 additions and 36 deletions

megatron/text_generation_utils.py megatron/text_generation_utils.py +22 -32

tools/run_api_server.py tools/run_api_server.py +0 -4

No files found.
--- a/megatron/text_generation_utils.py
+++ b/megatron/text_generation_utils.py
@@ -189,16 +189,6 @@ def sample_sequence_batch(model, context_tokens, context_lengths,
        lengths = torch.ones([batch_size]).long().cuda() * maxlen
        while context_length <= (maxlen):
-            if args.recompute:
-                output = forward_step(model, tokens,
-                                      position_ids,
-                                      attention_mask,
-                                      tokentype_ids=type_ids,
-                                      forward_method_parallel_output=False)
-                if mpu.is_pipeline_last_stage():
-                    assert output is not None
-                    logits = output[:, context_length - 1, :]
-            else:
            types2use = None
            if counter == 0:
                tokens2use = tokens[:, :context_length]

--- a/tools/run_api_server.py
+++ b/tools/run_api_server.py
@@ -55,10 +55,6 @@ def add_text_generate_args(parser):
                       help='Top k sampling.')
    group.add_argument("--out-seq-length", type=int, default=1024,
                       help='Size of the output generated text.')
-    group.add_argument("--recompute", action='store_true',
-                       help='During generation recompute all attention '
-                       'instead of using previously computed keys/values.')
    return parser