Merge pull request #684 from baberabb/big-refactor_output

edited output_path and added help to args

Merge pull request #684 from baberabb/big-refactor_output
edited output_path and added help to args
3cd3921f · Hailey Schoelkopf · GitHub · 4dfa8aba · decb55cd · 3cd3921f
Unverified Commit 3cd3921f authored Jul 17, 2023 by Hailey Schoelkopf Committed by GitHub Jul 17, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 72 additions and 23 deletions

main.py main.py +72 -23

No files found.
--- a/main.py
+++ b/main.py
@@ -5,6 +5,7 @@ import fnmatch
 import jsonlines
 import argparse
 import logging
+from pathlib import Path

 from lm_eval import evaluator, utils
 from lm_eval.api.registry import ALL_TASKS
@@ -15,22 +16,41 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"

 def parse_args():
    parser = argparse.ArgumentParser()
-    parser.add_argument("--model", required=True)
-    parser.add_argument("--model_args", default="")
+    parser.add_argument("--model", required=True, help="Name of model e.g. `hf`")
+    parser.add_argument(
+        "--model_args",
+        default="",
+        help="String arguments for model, e.g. `pretrained=EleutherAI/pythia-160m,dtype=float32`",
+    )
    parser.add_argument(
        "--tasks", default=None, choices=utils.MultiChoice(sorted(ALL_TASKS))
    )
-    parser.add_argument("--config", default=None)
-    parser.add_argument("--num_fewshot", type=int, default=0)
-    parser.add_argument("--batch_size", type=int, default=1)
+    parser.add_argument(
+        "--num_fewshot",
+        type=int,
+        default=0,
+        help="Number of examples in few-shot context",
+    )
+    parser.add_argument("--batch_size", type=int, default=1)  # TODO: only integers
    parser.add_argument(
        "--max_batch_size",
        type=int,
        default=None,
        help="Maximal batch size to try with --batch_size auto",
    )
-    parser.add_argument("--device", type=str, default=None)
-    parser.add_argument("--output_path", default=None)
+    parser.add_argument(
+        "--device",
+        type=str,
+        default=None,
+        help="Device to use (e.g. cuda, cuda:0, cpu)",
+    )
+    parser.add_argument(
+        "--output_path",
+        default=None,
+        type=str,
+        metavar="= [dir/file.jsonl] [DIR]",
+        help="The path to the output file where the result metrics will be saved. If the path is a directory and log_samples is true, the results will be saved in the directory. Else the parent directory will be used.",
+    )
    parser.add_argument(
        "--limit",
        type=float,
@@ -38,12 +58,30 @@ def parse_args():
        help="Limit the number of examples per task. "
        "If <1, limit is a percentage of the total number of examples.",
    )
-    parser.add_argument("--data_sampling", type=float, default=None)
-    parser.add_argument("--use_cache", type=str, default=None)
-    parser.add_argument("--decontamination_ngrams_path", default=None)
-    parser.add_argument("--check_integrity", action="store_true")
-    parser.add_argument("--write_out", action="store_true", default=False)
-    parser.add_argument("--log_samples", action="store_true", default=False)
+    parser.add_argument(
+        "--use_cache",
+        type=str,
+        default=None,
+        help="A path to a sqlite db file for caching model responses. `None` if not caching.",
+    )
+    parser.add_argument("--decontamination_ngrams_path", default=None)  # TODO: not used
+    parser.add_argument(
+        "--check_integrity",
+        action="store_true",
+        help="Whether to run the relevant part of the test suite for the tasks",
+    )
+    parser.add_argument(
+        "--write_out",
+        action="store_true",
+        default=False,
+        help="Prints the prompt for the first few documents",
+    )
+    parser.add_argument(
+        "--log_samples",
+        action="store_true",
+        default=False,
+        help="If True, write out all model outputs and documents for per-sample measurement and post-hoc analysis",
+    )
    return parser.parse_args()


@@ -75,6 +113,25 @@ def main():
                    config = utils.load_yaml_config(task)
                    task_names.append(config)

+    if args.output_path:
+        path = Path(args.output_path)
+        # check if file or 'dir/results.jsonl' exists
+        if path.is_file() or Path(args.output_path).joinpath("results.jsonl").is_file():
+            eval_logger.warning(
+                f"File already exists at {path}. Results will be overwritten."
+            )
+            assert not path.is_file(), "File already exists"
+        # if path json then get parent dir
+        elif path.suffix in (".json", ".jsonl"):
+            output_path_file = path
+            path.parent.mkdir(parents=True, exist_ok=True)
+            path = path.parent
+        else:
+            path.mkdir(parents=True, exist_ok=True)
+            output_path_file = path.joinpath("results.json")
+    elif args.log_samples and not args.output_path:
+        assert args.output_path, "Specify --output_path"
+
    eval_logger.info(f"Selected Tasks: {task_names}")

    results = evaluator.simple_evaluate(
@@ -102,22 +159,14 @@ def main():
        batch_sizes = ",".join(map(str, results["config"]["batch_sizes"]))

        if args.output_path:
-            os.makedirs(os.path.dirname(args.output_path), exist_ok=True)
-
-            with open(args.output_path, "w") as f:
-                f.write(dumped)
+            output_path_file.open("w").write(dumped)

            if args.log_samples:
                for task_name, config in results["configs"].items():
                    output_name = "{}_{}".format(
                        re.sub("/", "__", args.model_args), task_name
                    )
-                    if os.path.isdir(args.output_path):
-                        filename = f"./{args.output_path}/{output_name}.jsonl"
-                    elif os.path.isfile(args.output_path):
-                        filename = (
-                            f"./{os.path.dirname(args.output_path)}/{output_name}.jsonl"
-                        )
+                    filename = path.joinpath(f"{output_name}.jsonl")

                    with jsonlines.open(filename, "w") as f:
                        f.write_all(samples[task_name])