"cacheflow/frontend/fastapi_frontend.py" did not exist on "7a7929abe8e2fd6a4688487c471a1ee1fde0edd2"
Commit 453414da authored by rprenger's avatar rprenger
Browse files

Removing unnecessary --recompute path

parent f7fe3865
...@@ -189,16 +189,6 @@ def sample_sequence_batch(model, context_tokens, context_lengths, ...@@ -189,16 +189,6 @@ def sample_sequence_batch(model, context_tokens, context_lengths,
lengths = torch.ones([batch_size]).long().cuda() * maxlen lengths = torch.ones([batch_size]).long().cuda() * maxlen
while context_length <= (maxlen): while context_length <= (maxlen):
if args.recompute:
output = forward_step(model, tokens,
position_ids,
attention_mask,
tokentype_ids=type_ids,
forward_method_parallel_output=False)
if mpu.is_pipeline_last_stage():
assert output is not None
logits = output[:, context_length - 1, :]
else:
types2use = None types2use = None
if counter == 0: if counter == 0:
tokens2use = tokens[:, :context_length] tokens2use = tokens[:, :context_length]
......
...@@ -55,10 +55,6 @@ def add_text_generate_args(parser): ...@@ -55,10 +55,6 @@ def add_text_generate_args(parser):
help='Top k sampling.') help='Top k sampling.')
group.add_argument("--out-seq-length", type=int, default=1024, group.add_argument("--out-seq-length", type=int, default=1024,
help='Size of the output generated text.') help='Size of the output generated text.')
group.add_argument("--recompute", action='store_true',
help='During generation recompute all attention '
'instead of using previously computed keys/values.')
return parser return parser
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment