help="Path to config with all arguments for `lm-eval`",
help="Path to config with all arguments for `lm-eval`",
)
)
parser.add_argument(
self._parser.add_argument(
"--model",
"--model",
"-m",
"-m",
type=str,
type=str,
default="hf",
default="hf",
help="Name of model. Default 'hf'",
help="Name of model. Default 'hf'",
)
)
parser.add_argument(
self._parser.add_argument(
"--tasks",
"--tasks",
"-t",
"-t",
default=None,
default=None,
...
@@ -61,14 +69,14 @@ Examples:
...
@@ -61,14 +69,14 @@ Examples:
metavar="task1,task2",
metavar="task1,task2",
help="Comma-separated list of task names or task groupings to evaluate on.\nTo get full list of tasks, use one of the commands `lm-eval --tasks {{list_groups,list_subtasks,list_tags,list}}` to list out all available names for task groupings; only (sub)tasks; tags; or all of the above",
help="Comma-separated list of task names or task groupings to evaluate on.\nTo get full list of tasks, use one of the commands `lm-eval --tasks {{list_groups,list_subtasks,list_tags,list}}` to list out all available names for task groupings; only (sub)tasks; tags; or all of the above",
)
)
parser.add_argument(
self._parser.add_argument(
"--model_args",
"--model_args",
"-a",
"-a",
default=None,
default=None,
type=try_parse_json,
type=try_parse_json,
help="""Comma separated string or JSON formatted arguments for model, e.g. `pretrained=EleutherAI/pythia-160m,dtype=float32` or '{"pretrained":"EleutherAI/pythia-160m","dtype":"float32"}'.""",
help="""Comma separated string or JSON formatted arguments for model, e.g. `pretrained=EleutherAI/pythia-160m,dtype=float32` or '{"pretrained":"EleutherAI/pythia-160m","dtype":"float32"}'.""",
)
)
parser.add_argument(
self._parser.add_argument(
"--num_fewshot",
"--num_fewshot",
"-f",
"-f",
type=int,
type=int,
...
@@ -76,7 +84,7 @@ Examples:
...
@@ -76,7 +84,7 @@ Examples:
metavar="N",
metavar="N",
help="Number of examples in few-shot context",
help="Number of examples in few-shot context",
)
)
parser.add_argument(
self._parser.add_argument(
"--batch_size",
"--batch_size",
"-b",
"-b",
type=str,
type=str,
...
@@ -84,20 +92,20 @@ Examples:
...
@@ -84,20 +92,20 @@ Examples:
metavar="auto|auto:N|N",
metavar="auto|auto:N|N",
help="Acceptable values are 'auto', 'auto:N' (recompute batchsize N times with time) or N, where N is an integer. Default 1.",
help="Acceptable values are 'auto', 'auto:N' (recompute batchsize N times with time) or N, where N is an integer. Default 1.",
)
)
parser.add_argument(
self._parser.add_argument(
"--max_batch_size",
"--max_batch_size",
type=int,
type=int,
default=None,
default=None,
metavar="N",
metavar="N",
help="Maximal batch size to try with --batch_size auto.",
help="Maximal batch size to try with --batch_size auto.",
)
)
parser.add_argument(
self._parser.add_argument(
"--device",
"--device",
type=str,
type=str,
default=None,
default=None,
help="Device to use (e.g. cuda, cuda:0, cpu). Model defaults. Default None.",
help="Device to use (e.g. cuda, cuda:0, cpu). Model defaults. Default None.",
)
)
parser.add_argument(
self._parser.add_argument(
"--output_path",
"--output_path",
"-o",
"-o",
default=None,
default=None,
...
@@ -105,7 +113,7 @@ Examples:
...
@@ -105,7 +113,7 @@ Examples:
metavar="DIR|DIR/file.json",
metavar="DIR|DIR/file.json",
help="Path where result metrics will be saved. Can be either a directory or a .json file. If the path is a directory and log_samples is true, the results will be saved in the directory. Else the parent directory will be used.",
help="Path where result metrics will be saved. Can be either a directory or a .json file. If the path is a directory and log_samples is true, the results will be saved in the directory. Else the parent directory will be used.",
)
)
parser.add_argument(
self._parser.add_argument(
"--limit",
"--limit",
"-L",
"-L",
type=float,
type=float,
...
@@ -114,7 +122,7 @@ Examples:
...
@@ -114,7 +122,7 @@ Examples:
help="Limit the number of examples per task. "
help="Limit the number of examples per task. "
"If <1, limit is a percentage of the total number of examples.",
"If <1, limit is a percentage of the total number of examples.",
)
)
parser.add_argument(
self._parser.add_argument(
"--samples",
"--samples",
"-E",
"-E",
default=None,
default=None,
...
@@ -122,7 +130,7 @@ Examples:
...
@@ -122,7 +130,7 @@ Examples:
metavar="/path/to/json",
metavar="/path/to/json",
help='JSON string or path to JSON file containing doc indices of selected examples to test. Format: {"task_name":[indices],...}',
help='JSON string or path to JSON file containing doc indices of selected examples to test. Format: {"task_name":[indices],...}',
)
)
parser.add_argument(
self._parser.add_argument(
"--use_cache",
"--use_cache",
"-c",
"-c",
type=str,
type=str,
...
@@ -130,40 +138,40 @@ Examples:
...
@@ -130,40 +138,40 @@ Examples:
metavar="DIR",
metavar="DIR",
help="A path to a sqlite db file for caching model responses. `None` if not caching.",
help="A path to a sqlite db file for caching model responses. `None` if not caching.",
)
)
parser.add_argument(
self._parser.add_argument(
"--cache_requests",
"--cache_requests",
type=request_caching_arg_to_dict,
type=request_caching_arg_to_dict,
default=None,
default=None,
choices=["true","refresh","delete"],
choices=["true","refresh","delete"],
help="Speed up evaluation by caching the building of dataset requests. `None` if not caching.",
help="Speed up evaluation by caching the building of dataset requests. `None` if not caching.",
)
)
parser.add_argument(
self._parser.add_argument(
"--check_integrity",
"--check_integrity",
action="store_true",
action="store_true",
default=argparse.SUPPRESS,
default=argparse.SUPPRESS,
help="Whether to run the relevant part of the test suite for the tasks.",
help="Whether to run the relevant part of the test suite for the tasks.",
)
)
parser.add_argument(
self._parser.add_argument(
"--write_out",
"--write_out",
"-w",
"-w",
action="store_true",
action="store_true",
default=argparse.SUPPRESS,
default=argparse.SUPPRESS,
help="Prints the prompt for the first few documents.",
help="Prints the prompt for the first few documents.",
)
)
parser.add_argument(
self._parser.add_argument(
"--log_samples",
"--log_samples",
"-s",
"-s",
action="store_true",
action="store_true",
default=argparse.SUPPRESS,
default=argparse.SUPPRESS,
help="If True, write out all model outputs and documents for per-sample measurement and post-hoc analysis. Use with --output_path.",
help="If True, write out all model outputs and documents for per-sample measurement and post-hoc analysis. Use with --output_path.",
)
)
parser.add_argument(
self._parser.add_argument(
"--system_instruction",
"--system_instruction",
type=str,
type=str,
default=None,
default=None,
help="System instruction to be used in the prompt",
help="System instruction to be used in the prompt",
)
)
parser.add_argument(
self._parser.add_argument(
"--apply_chat_template",
"--apply_chat_template",
type=str,
type=str,
nargs="?",
nargs="?",
...
@@ -176,26 +184,26 @@ Examples:
...
@@ -176,26 +184,26 @@ Examples:
"E.g. `--apply_chat_template template_name`"
"E.g. `--apply_chat_template template_name`"
),
),
)
)
parser.add_argument(
self._parser.add_argument(
"--fewshot_as_multiturn",
"--fewshot_as_multiturn",
action="store_true",
action="store_true",
default=argparse.SUPPRESS,
default=argparse.SUPPRESS,
help="If True, uses the fewshot as a multi-turn conversation",
help="If True, uses the fewshot as a multi-turn conversation",
)
)
parser.add_argument(
self._parser.add_argument(
"--show_config",
"--show_config",
action="store_true",
action="store_true",
default=argparse.SUPPRESS,
default=argparse.SUPPRESS,
help="If True, shows the the full config of all tasks at the end of the evaluation.",
help="If True, shows the the full config of all tasks at the end of the evaluation.",
)
)
parser.add_argument(
self._parser.add_argument(
"--include_path",
"--include_path",
type=str,
type=str,
default=None,
default=None,
metavar="DIR",
metavar="DIR",
help="Additional path to include if there are external tasks to include.",
help="Additional path to include if there are external tasks to include.",
)
)
parser.add_argument(
self._parser.add_argument(
"--gen_kwargs",
"--gen_kwargs",
type=try_parse_json,
type=try_parse_json,
default=None,
default=None,
...
@@ -204,7 +212,7 @@ Examples:
...
@@ -204,7 +212,7 @@ Examples:
""" e.g. '{"do_sample": True, temperature":0.7,"until":["hello"]}' or temperature=0,top_p=0.1."""
""" e.g. '{"do_sample": True, temperature":0.7,"until":["hello"]}' or temperature=0,top_p=0.1."""
),
),
)
)
parser.add_argument(
self._parser.add_argument(
"--verbosity",
"--verbosity",
"-v",
"-v",
type=str.upper,
type=str.upper,
...
@@ -212,25 +220,25 @@ Examples:
...
@@ -212,25 +220,25 @@ Examples:
metavar="CRITICAL|ERROR|WARNING|INFO|DEBUG",
metavar="CRITICAL|ERROR|WARNING|INFO|DEBUG",
help="(Deprecated) Controls logging verbosity level. Use the `LOGLEVEL` environment variable instead. Set to DEBUG for detailed output when testing or adding new task configurations.",
help="(Deprecated) Controls logging verbosity level. Use the `LOGLEVEL` environment variable instead. Set to DEBUG for detailed output when testing or adding new task configurations.",
)
)
parser.add_argument(
self._parser.add_argument(
"--wandb_args",
"--wandb_args",
type=str,
type=str,
default=argparse.SUPPRESS,
default=argparse.SUPPRESS,
help="Comma separated string arguments passed to wandb.init, e.g. `project=lm-eval,job_type=eval`",
help="Comma separated string arguments passed to wandb.init, e.g. `project=lm-eval,job_type=eval`",
)
)
parser.add_argument(
self._parser.add_argument(
"--wandb_config_args",
"--wandb_config_args",
type=str,
type=str,
default=argparse.SUPPRESS,
default=argparse.SUPPRESS,
help="Comma separated string arguments passed to wandb.config.update. Use this to trace parameters that aren't already traced by default. eg. `lr=0.01,repeats=3`",
help="Comma separated string arguments passed to wandb.config.update. Use this to trace parameters that aren't already traced by default. eg. `lr=0.01,repeats=3`",
)
)
parser.add_argument(
self._parser.add_argument(
"--hf_hub_log_args",
"--hf_hub_log_args",
type=str,
type=str,
default=argparse.SUPPRESS,
default=argparse.SUPPRESS,
help="Comma separated string arguments passed to Hugging Face Hub's log function, e.g. `hub_results_org=EleutherAI,hub_repo_name=lm-eval-results`",
help="Comma separated string arguments passed to Hugging Face Hub's log function, e.g. `hub_results_org=EleutherAI,hub_repo_name=lm-eval-results`",
)
)
parser.add_argument(
self._parser.add_argument(
"--predict_only",
"--predict_only",
"-x",
"-x",
action="store_true",
action="store_true",
...
@@ -238,7 +246,7 @@ Examples:
...
@@ -238,7 +246,7 @@ Examples:
help="Use with --log_samples. Only model outputs will be saved and metrics will not be evaluated.",
help="Use with --log_samples. Only model outputs will be saved and metrics will not be evaluated.",