Merge pull request #480 from kenhktsui/float-limit

Evaluation Against Portion of Benchmark Data

Merge pull request #480 from kenhktsui/float-limit
Evaluation Against Portion of Benchmark Data
96a83d45 · Stella Biderman · GitHub · e53eb332 · 3fda1195 · 96a83d45
Unverified Commit 96a83d45 authored May 21, 2023 by Stella Biderman Committed by GitHub May 21, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 3 deletions

lm_eval/evaluator.py lm_eval/evaluator.py +4 -2

main.py main.py +4 -1

No files found.
--- a/lm_eval/evaluator.py
+++ b/lm_eval/evaluator.py
@@ -42,8 +42,8 @@ def simple_evaluate(
        PyTorch device (e.g. "cpu" or "cuda:0") for running models
    :param no_cache: bool
        Whether or not to cache
-    :param limit: int, optional
-        Limit the number of examples per task (only use this for testing)
+    :param limit: int or float, optional
+        Limit the number of examples per task (only use this for testing), If <1, limit is a percentage of the total number of examples.
    :param bootstrap_iters:
        Number of iterations for bootstrap statistics
    :param description_dict: dict[str, str]
@@ -203,6 +203,8 @@ def evaluate(
            if description_dict and task_name in description_dict
            else ""
        )
+        if limit is not None:
+            limit = int(len(task_docs) * limit) if limit < 1.0 else int(limit)

        for doc_id, doc in enumerate(itertools.islice(task_docs, 0, limit)):


--- a/main.py
+++ b/main.py
@@ -36,7 +36,10 @@ def parse_args():
    parser.add_argument("--batch_size", type=str, default=None)
    parser.add_argument("--device", type=str, default=None)
    parser.add_argument("--output_path", default=None)
-    parser.add_argument("--limit", type=int, default=None)
+    parser.add_argument("--limit", type=float, default=None,
+                        help="Limit the number of examples per task. "
+                             "If <1, limit is a percentage of the total number of examples.")
+    parser.add_argument("--data_sampling", type=float, default=None)
    parser.add_argument("--no_cache", action="store_true")
    parser.add_argument("--decontamination_ngrams_path", default=None)
    parser.add_argument("--description_dict_path", default=None)