Fix typo in INFERENCE.md, change return_tensors and to correct usage of device (#98)

9f34c1b8 · UncleCode · GitHub · 6185106e · 9f34c1b8
Unverified Commit 9f34c1b8 authored Aug 13, 2024 by UncleCode Committed by GitHub Aug 13, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

INFERENCE.md INFERENCE.md +1 -1

No files found.
--- a/INFERENCE.md
+++ b/INFERENCE.md
@@ -62,7 +62,7 @@ model.generation_config.cache_implementation = "static"
 model.forward = torch.compile(model.forward, mode=compile_mode)

 # warmup
-inputs = tokenizer("This is for compilation", return_tensors="pt", padding="max_length", max_length=max_length).to(device)
+inputs = tokenizer("This is for compilation", return_tensors="pt", padding="max_length", max_length=max_length).to(torch_device)

 model_kwargs = {**inputs, "prompt_input_ids": inputs.input_ids, "prompt_attention_mask": inputs.attention_mask, }