Avoid creating model_cache for OVModelForCausalLM

f7f4b674 · andreyanufr · GitHub · b73850b0 · f7f4b674
Unverified Commit f7f4b674 authored Nov 30, 2023 by andreyanufr Committed by GitHub Nov 30, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 0 deletions

lm_eval/models/gpt2.py lm_eval/models/gpt2.py +3 -0

No files found.
--- a/lm_eval/models/gpt2.py
+++ b/lm_eval/models/gpt2.py
@@ -214,12 +214,15 @@ class OPTIMUMLM(BaseLM):
        # TODO: update this to be less of a hack once subfolder is fixed in HF
        revision = revision + ("/" + subfolder if subfolder is not None else "")

+        ov_config = {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": ""}
+
        self.gpt2 = OVModelForCausalLM.from_pretrained(
            pretrained,
            load_in_8bit=load_in_8bit,
            revision=revision,
            trust_remote_code=trust_remote_code,
            use_cache=True,
+            ov_config=ov_config
        )

        try: