Fixed issues

8910c72f · Alexander · a702689d · 8910c72f · 8910c72f · 8910c72f
Commit 8910c72f authored Nov 16, 2023 by Alexander
Showing with 12 additions and 25 deletions

lm_eval/evaluator.py lm_eval/evaluator.py +0 -5

lm_eval/models/__init__.py lm_eval/models/__init__.py +2 -1

lm_eval/models/gpt2.py lm_eval/models/gpt2.py +9 -18

main.py main.py +0 -1

setup.py setup.py +1 -0

No files found.
--- a/lm_eval/evaluator.py
+++ b/lm_eval/evaluator.py
@@ -28,7 +28,6 @@ def simple_evaluate(
    description_dict=None,
    check_integrity=False,
    decontamination_ngrams_path=None,
-    tokenizer=None,
    write_out=False,
    output_base_path=None,
 ):
@@ -75,9 +74,6 @@ def simple_evaluate(
        if model_args is None:
            model_args = ""
        lm = lm_eval.models.get_model(model).create_from_arg_string(
-<<<<<<< HEAD
-            model_args, {"batch_size": batch_size, "device": device, "tokenizer": tokenizer, "trust_remote_code": True}
-=======
            model_args,
            {
                "batch_size": batch_size,
@@ -90,7 +86,6 @@ def simple_evaluate(
            pretrained=model,
            batch_size=batch_size,
            max_batch_size=max_batch_size,
->>>>>>> origin/master
        )
        no_cache = True
    else:

--- a/lm_eval/models/__init__.py
+++ b/lm_eval/models/__init__.py
@@ -16,7 +16,8 @@ MODEL_REGISTRY = {
    "anthropic": anthropic_llms.AnthropicLM,
    "textsynth": textsynth.TextSynthLM,
    "dummy": dummy.DummyLM,
-    "gguf": gguf.GGUFLM
+    "gguf": gguf.GGUFLM,
+    "optimum-causal": gpt2.OPTIMUMLM,
 }



--- a/lm_eval/models/gpt2.py
+++ b/lm_eval/models/gpt2.py
@@ -215,32 +215,23 @@ class OPTIMUMLM(BaseLM):

        self.gpt2 = OVModelForCausalLM.from_pretrained(
            pretrained,
-            # load_in_8bit=load_in_8bit,
-            # low_cpu_mem_usage=low_cpu_mem_usage,
+            load_in_8bit=load_in_8bit,
            revision=revision,
            trust_remote_code=trust_remote_code,
            use_cache=True,
        )
-        #self.gpt2.eval()

-        self.tokenizer = transformers.AutoTokenizer.from_pretrained(
-            pretrained if tokenizer is None else tokenizer,
-            revision=revision,
-            trust_remote_code=trust_remote_code,
-        )
+        try:
+            self.tokenizer = transformers.AutoTokenizer.from_pretrained(
+                pretrained if tokenizer is None else tokenizer,
+                revision=revision,
+                trust_remote_code=trust_remote_code,
+            )
+        except:
+            print("Tokenizer is missed. Plaase save it into the same folder with the model.")

        self.vocab_size = self.tokenizer.vocab_size

-        # if isinstance(
-        #     self.tokenizer, (transformers.GPT2Tokenizer, transformers.GPT2TokenizerFast)
-        # ):
-        #     assert self.tokenizer.encode("hello\n\nhello") == [
-        #         31373,
-        #         198,
-        #         198,
-        #         31373,
-        #     ], self.tokenizer.encode("hello\n\nhello")
-
        # setup for automatic batch size detection
        if batch_size == 'auto': 
            self.batch_size_per_gpu = batch_size

--- a/main.py
+++ b/main.py
@@ -79,7 +79,6 @@ def main():
        description_dict=description_dict,
        decontamination_ngrams_path=args.decontamination_ngrams_path,
        check_integrity=args.check_integrity,
-        tokenizer=args.tokenizer,
        write_out=args.write_out,
        output_base_path=args.output_base_path,
    )

--- a/setup.py
+++ b/setup.py
@@ -49,5 +49,6 @@ setuptools.setup(
        "sentencepiece": ["sentencepiece>=0.1.98", "protobuf>=4.22.1"],
        "auto-gptq": ["auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"],
        "anthropic": ["anthropic"],
+        "openvino": ["openvino", "nncf", "onnx", "optimum-intel @ git+https://github.com/huggingface/optimum-intel.git"],
    },
 )