Unverified Commit b8f1cde9 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Fix Mistral OOM again (#26847)



fix
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent fd6a0ade
...@@ -437,6 +437,7 @@ class MistralIntegrationTest(unittest.TestCase): ...@@ -437,6 +437,7 @@ class MistralIntegrationTest(unittest.TestCase):
input_ids = [1, 306, 4658, 278, 6593, 310, 2834, 338] input_ids = [1, 306, 4658, 278, 6593, 310, 2834, 338]
model = MistralForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", device_map="auto") model = MistralForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", device_map="auto")
input_ids = torch.tensor([input_ids]).to(model.model.embed_tokens.weight.device) input_ids = torch.tensor([input_ids]).to(model.model.embed_tokens.weight.device)
with torch.no_grad():
out = model(input_ids).logits.cpu() out = model(input_ids).logits.cpu()
# Expected mean on dim = -1 # Expected mean on dim = -1
EXPECTED_MEAN = torch.tensor([[-2.5548, -2.5737, -3.0600, -2.5906, -2.8478, -2.8118, -2.9325, -2.7694]]) EXPECTED_MEAN = torch.tensor([[-2.5548, -2.5737, -3.0600, -2.5906, -2.8478, -2.8118, -2.9325, -2.7694]])
...@@ -457,8 +458,8 @@ class MistralIntegrationTest(unittest.TestCase): ...@@ -457,8 +458,8 @@ class MistralIntegrationTest(unittest.TestCase):
EXPECTED_TEXT_COMPLETION = """My favourite condiment is 100% ketchup. I love it on everything. I’m not a big""" EXPECTED_TEXT_COMPLETION = """My favourite condiment is 100% ketchup. I love it on everything. I’m not a big"""
prompt = "My favourite condiment is " prompt = "My favourite condiment is "
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", use_fast=False) tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", use_fast=False)
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(torch_device)
model = MistralForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", device_map="auto") model = MistralForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", device_map="auto")
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.model.embed_tokens.weight.device)
# greedy generation outputs # greedy generation outputs
generated_ids = model.generate(input_ids, max_new_tokens=20, temperature=0) generated_ids = model.generate(input_ids, max_new_tokens=20, temperature=0)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment