Commit 8d608117 authored by lintangsutawika's avatar lintangsutawika
Browse files

added logging process

parent 4923a7ce
import collections import random
import itertools import itertools
import collections
import numpy as np import numpy as np
import random
import lm_eval.api
import lm_eval.api.metrics import lm_eval.api.metrics
import lm_eval.models
import lm_eval.tasks import lm_eval.tasks
import lm_eval.api import lm_eval.models
from lm_eval.utils import positional_deprecated, run_task_tests, make_table, get_git_commit_hash from lm_eval.utils import positional_deprecated, run_task_tests, make_table, get_git_commit_hash
from lm_eval.logger import eval_logger
@positional_deprecated @positional_deprecated
def simple_evaluate( def simple_evaluate(
...@@ -152,7 +158,7 @@ def evaluate( ...@@ -152,7 +158,7 @@ def evaluate(
### Run LM on inputs, get all outputs ### ### Run LM on inputs, get all outputs ###
# execute each type of request # execute each type of request
for reqtype, reqs in requests.items(): for reqtype, reqs in requests.items():
print("Running", reqtype, "requests") eval_logger.info("Running {} requests".format(reqtype))
# create `K` copies of each request `req` based off `K = req.repeats` # create `K` copies of each request `req` based off `K = req.repeats`
cloned_reqs = [] cloned_reqs = []
for req in reqs: for req in reqs:
......
import logging
logging.basicConfig(
format='%(asctime)s,%(msecs)03d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
datefmt='%Y-%m-%d:%H:%M:%S',
level=logging.INFO
)
eval_logger = logging.getLogger("lm-eval")
\ No newline at end of file
...@@ -6,6 +6,7 @@ from tqdm import tqdm ...@@ -6,6 +6,7 @@ from tqdm import tqdm
import torch.nn.functional as F import torch.nn.functional as F
from lm_eval import utils from lm_eval import utils
from lm_eval.logger import eval_logger
from lm_eval.api.model import LM, register_model from lm_eval.api.model import LM, register_model
...@@ -31,10 +32,10 @@ class HFLM(LM): ...@@ -31,10 +32,10 @@ class HFLM(LM):
if device not in ["cuda", "cpu"]: if device not in ["cuda", "cpu"]:
device = int(device) device = int(device)
self._device = torch.device(device) self._device = torch.device(device)
print(f"Using device '{device}'") eval_logger.info(f"Using device '{device}'")
else: else:
print("Device not specified") eval_logger.warning("Device not specified")
print(f"Cuda Available? {torch.cuda.is_available()}") eval_logger.info(f"Cuda Available? {torch.cuda.is_available()}")
self._device = ( self._device = (
torch.device("cuda") torch.device("cuda")
if torch.cuda.is_available() if torch.cuda.is_available()
......
import os import os
import numpy as np import time
import transformers import transformers
from lm_eval.api.model import LM, register_model
from lm_eval import utils import numpy as np
from tqdm import tqdm from tqdm import tqdm
import time from lm_eval import utils
from lm_eval.api.model import LM, register_model
def get_result(response, ctxlen): def get_result(response, ctxlen):
......
...@@ -4,6 +4,7 @@ from typing import List, Union ...@@ -4,6 +4,7 @@ from typing import List, Union
from .arc import * from .arc import *
from lm_eval import utils from lm_eval import utils
from lm_eval.logger import eval_logger
from lm_eval.api.task import TaskConfig, Task, ConfigurableTask from lm_eval.api.task import TaskConfig, Task, ConfigurableTask
from lm_eval.api.register import ( from lm_eval.api.register import (
register_task, register_task,
...@@ -53,8 +54,8 @@ def get_task(task_name, config): ...@@ -53,8 +54,8 @@ def get_task(task_name, config):
try: try:
return TASK_REGISTRY[task_name](config) return TASK_REGISTRY[task_name](config)
except KeyError: except KeyError:
print("Available tasks:") eval_logger.info("Available tasks:")
pprint(TASK_REGISTRY) eval_logger.info(TASK_REGISTRY)
raise KeyError(f"Missing task {task_name}") raise KeyError(f"Missing task {task_name}")
......
import argparse import os
import yaml
import json import json
import logging
import fnmatch import fnmatch
import yaml import warnings
import os import argparse
from pprint import pformat
from lm_eval import evaluator, utils from lm_eval import evaluator, utils
from lm_eval.tasks import ALL_TASKS from lm_eval.tasks import ALL_TASKS
from lm_eval.logger import eval_logger
logging.getLogger("openai").setLevel(logging.WARNING)
os.environ['TOKENIZERS_PARALLELISM'] = 'false' os.environ['TOKENIZERS_PARALLELISM'] = 'false'
class MultiChoice: class MultiChoice:
def __init__(self, choices): def __init__(self, choices):
self.choices = choices self.choices = choices
print(f"{ALL_TASKS} is this")
# Simple wildcard support (linux filename patterns) # Simple wildcard support (linux filename patterns)
def __contains__(self, values): def __contains__(self, values):
for value in values.split(","): for value in values.split(","):
if len(fnmatch.filter(self.choices, value)) == 0: if len(fnmatch.filter(self.choices, value)) == 0:
return False eval_logger.warning("{} is not in task list.".format(value))
# eval_logger.info(f"{ALL_TASKS} is this")
return True return True
...@@ -45,7 +47,6 @@ def parse_args(): ...@@ -45,7 +47,6 @@ def parse_args():
parser.add_argument("--decontamination_ngrams_path", default=None) parser.add_argument("--decontamination_ngrams_path", default=None)
parser.add_argument("--description_dict_path", default=None) parser.add_argument("--description_dict_path", default=None)
parser.add_argument("--check_integrity", action="store_true") parser.add_argument("--check_integrity", action="store_true")
return parser.parse_args() return parser.parse_args()
...@@ -63,8 +64,9 @@ def main(): ...@@ -63,8 +64,9 @@ def main():
args = parse_args() args = parse_args()
if args.limit: if args.limit:
print( eval_logger.warning(
"WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT." " --limit SHOULD ONLY BE USED FOR TESTING."
"REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
) )
if args.tasks != None: if args.tasks != None:
...@@ -73,14 +75,14 @@ def main(): ...@@ -73,14 +75,14 @@ def main():
task_names = [] task_names = []
yaml_path = os.path.join(args.tasks, "*.yaml") yaml_path = os.path.join(args.tasks, "*.yaml")
for yaml_file in glob.glob(yaml_path): for yaml_file in glob.glob(yaml_path):
config = yaml.full_load(yaml_file) config = utils.load_yaml_config(yaml_file)
task_names.append(config) task_names.append(config)
else: else:
tasks_list = args.tasks.split(",") tasks_list = args.tasks.split(",")
task_names = pattern_match(tasks_list, ALL_TASKS) task_names = pattern_match(tasks_list, ALL_TASKS)
for task in [task for task in tasks_list if task not in task_names]: for task in [task for task in tasks_list if task not in task_names]:
if os.path.isfile(task): if os.path.isfile(task):
config = utils.get_yaml_config(task) config = utils.load_yaml_config(task)
task_names.append(config) task_names.append(config)
# # Tas # # Tas
...@@ -100,8 +102,7 @@ def main(): ...@@ -100,8 +102,7 @@ def main():
# task_names = ALL_TASKS # task_names = ALL_TASKS
# else: # else:
eval_logger.info(f"Selected Tasks: {task_names}")
print(f"Selected Tasks: {task_names}")
results = evaluator.simple_evaluate( results = evaluator.simple_evaluate(
model=args.model, model=args.model,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment