Added missing example folder.

32f8c892 · Tim Dettmers · 7c651012 · 32f8c892
Commit 32f8c892 authored Apr 12, 2023 by Tim Dettmers
Hide whitespace changes
Inline Side-by-side

Showing with 27 additions and 0 deletions

examples/int8_inference_huggingface.py examples/int8_inference_huggingface.py +27 -0

No files found.
--- a/examples/int8_inference_huggingface.py
+++ b/examples/int8_inference_huggingface.py
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+MAX_NEW_TOKENS = 128
+model_name = 'decapoda-research/llama-7b-hf'
+text = 'Hamburg is in which country?\n'
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+input_ids = tokenizer(text, return_tensors="pt").input_ids
+free_in_GB = int(torch.cuda.mem_get_info()[0]/1024**3)
+max_memory = f'{int(torch.cuda.mem_get_info()[0]/1024**3)-2}GB'
+n_gpus = torch.cuda.device_count()
+max_memory = {i: max_memory for i in range(n_gpus)}
+model = AutoModelForCausalLM.from_pretrained(
+  model_name,
+  device_map='auto',
+  load_in_8bit=True,
+  max_memory=max_memory
+)
+generated_ids = model.generate(input_ids, max_length=MAX_NEW_TOKENS)
+print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))