Unverified Commit f7f4b674 authored by andreyanufr's avatar andreyanufr Committed by GitHub
Browse files

Avoid creating model_cache for OVModelForCausalLM

parent b73850b0
......@@ -214,12 +214,15 @@ class OPTIMUMLM(BaseLM):
# TODO: update this to be less of a hack once subfolder is fixed in HF
revision = revision + ("/" + subfolder if subfolder is not None else "")
ov_config = {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": ""}
self.gpt2 = OVModelForCausalLM.from_pretrained(
pretrained,
load_in_8bit=load_in_8bit,
revision=revision,
trust_remote_code=trust_remote_code,
use_cache=True,
ov_config=ov_config
)
try:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment