Unverified Commit 3cd3921f authored by Hailey Schoelkopf's avatar Hailey Schoelkopf Committed by GitHub
Browse files

Merge pull request #684 from baberabb/big-refactor_output

edited output_path and added help to args
parents 4dfa8aba decb55cd
......@@ -5,6 +5,7 @@ import fnmatch
import jsonlines
import argparse
import logging
from pathlib import Path
from lm_eval import evaluator, utils
from lm_eval.api.registry import ALL_TASKS
......@@ -15,22 +16,41 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--model", required=True)
parser.add_argument("--model_args", default="")
parser.add_argument("--model", required=True, help="Name of model e.g. `hf`")
parser.add_argument(
"--model_args",
default="",
help="String arguments for model, e.g. `pretrained=EleutherAI/pythia-160m,dtype=float32`",
)
parser.add_argument(
"--tasks", default=None, choices=utils.MultiChoice(sorted(ALL_TASKS))
)
parser.add_argument("--config", default=None)
parser.add_argument("--num_fewshot", type=int, default=0)
parser.add_argument("--batch_size", type=int, default=1)
parser.add_argument(
"--num_fewshot",
type=int,
default=0,
help="Number of examples in few-shot context",
)
parser.add_argument("--batch_size", type=int, default=1) # TODO: only integers
parser.add_argument(
"--max_batch_size",
type=int,
default=None,
help="Maximal batch size to try with --batch_size auto",
)
parser.add_argument("--device", type=str, default=None)
parser.add_argument("--output_path", default=None)
parser.add_argument(
"--device",
type=str,
default=None,
help="Device to use (e.g. cuda, cuda:0, cpu)",
)
parser.add_argument(
"--output_path",
default=None,
type=str,
metavar="= [dir/file.jsonl] [DIR]",
help="The path to the output file where the result metrics will be saved. If the path is a directory and log_samples is true, the results will be saved in the directory. Else the parent directory will be used.",
)
parser.add_argument(
"--limit",
type=float,
......@@ -38,12 +58,30 @@ def parse_args():
help="Limit the number of examples per task. "
"If <1, limit is a percentage of the total number of examples.",
)
parser.add_argument("--data_sampling", type=float, default=None)
parser.add_argument("--use_cache", type=str, default=None)
parser.add_argument("--decontamination_ngrams_path", default=None)
parser.add_argument("--check_integrity", action="store_true")
parser.add_argument("--write_out", action="store_true", default=False)
parser.add_argument("--log_samples", action="store_true", default=False)
parser.add_argument(
"--use_cache",
type=str,
default=None,
help="A path to a sqlite db file for caching model responses. `None` if not caching.",
)
parser.add_argument("--decontamination_ngrams_path", default=None) # TODO: not used
parser.add_argument(
"--check_integrity",
action="store_true",
help="Whether to run the relevant part of the test suite for the tasks",
)
parser.add_argument(
"--write_out",
action="store_true",
default=False,
help="Prints the prompt for the first few documents",
)
parser.add_argument(
"--log_samples",
action="store_true",
default=False,
help="If True, write out all model outputs and documents for per-sample measurement and post-hoc analysis",
)
return parser.parse_args()
......@@ -75,6 +113,25 @@ def main():
config = utils.load_yaml_config(task)
task_names.append(config)
if args.output_path:
path = Path(args.output_path)
# check if file or 'dir/results.jsonl' exists
if path.is_file() or Path(args.output_path).joinpath("results.jsonl").is_file():
eval_logger.warning(
f"File already exists at {path}. Results will be overwritten."
)
assert not path.is_file(), "File already exists"
# if path json then get parent dir
elif path.suffix in (".json", ".jsonl"):
output_path_file = path
path.parent.mkdir(parents=True, exist_ok=True)
path = path.parent
else:
path.mkdir(parents=True, exist_ok=True)
output_path_file = path.joinpath("results.json")
elif args.log_samples and not args.output_path:
assert args.output_path, "Specify --output_path"
eval_logger.info(f"Selected Tasks: {task_names}")
results = evaluator.simple_evaluate(
......@@ -102,22 +159,14 @@ def main():
batch_sizes = ",".join(map(str, results["config"]["batch_sizes"]))
if args.output_path:
os.makedirs(os.path.dirname(args.output_path), exist_ok=True)
with open(args.output_path, "w") as f:
f.write(dumped)
output_path_file.open("w").write(dumped)
if args.log_samples:
for task_name, config in results["configs"].items():
output_name = "{}_{}".format(
re.sub("/", "__", args.model_args), task_name
)
if os.path.isdir(args.output_path):
filename = f"./{args.output_path}/{output_name}.jsonl"
elif os.path.isfile(args.output_path):
filename = (
f"./{os.path.dirname(args.output_path)}/{output_name}.jsonl"
)
filename = path.joinpath(f"{output_name}.jsonl")
with jsonlines.open(filename, "w") as f:
f.write_all(samples[task_name])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment