Commit 768f55b3 authored by Baber's avatar Baber
Browse files

fix help

parent c59d4e2a
...@@ -46,37 +46,51 @@ class Run(SubCommand): ...@@ -46,37 +46,51 @@ class Run(SubCommand):
def _add_args(self) -> None: def _add_args(self) -> None:
self._parser = self._parser self._parser = self._parser
self._parser.add_argument(
# Configuration
config_group = self._parser.add_argument_group("configuration")
config_group.add_argument(
"--config", "--config",
"-C", "-C",
default=None, default=None,
type=str, type=str,
metavar="DIR/file.yaml", metavar="YAML_PATH",
help="Path to config with all arguments for `lm-eval`", help="Set initial arguments from YAML config",
) )
self._parser.add_argument(
# Model and Tasks
model_group = self._parser.add_argument_group("model and tasks")
model_group.add_argument(
"--model", "--model",
"-m", "-m",
type=str, type=str,
default="hf", default="hf",
help="Name of model. Default 'hf'", metavar="MODEL_NAME",
help="Model name (default: hf)",
) )
self._parser.add_argument( model_group.add_argument(
"--tasks", "--tasks",
"-t", "-t",
default=None, default=None,
type=str, type=str,
metavar="task1,task2", metavar="TASK1,TASK2",
help="Comma-separated list of task names or task groupings to evaluate on.\nTo get full list of tasks, use one of the commands `lm-eval --tasks {{list_groups,list_subtasks,list_tags,list}}` to list out all available names for task groupings; only (sub)tasks; tags; or all of the above", help=textwrap.dedent("""
Comma-separated list of task names or groupings.
Use 'lm-eval list tasks' to see all available tasks.
""").strip(),
) )
self._parser.add_argument( model_group.add_argument(
"--model_args", "--model_args",
"-a", "-a",
default=None, default=None,
type=try_parse_json, type=try_parse_json,
help="""Comma separated string or JSON formatted arguments for model, e.g. `pretrained=EleutherAI/pythia-160m,dtype=float32` or '{"pretrained":"EleutherAI/pythia-160m","dtype":"float32"}'.""", metavar="ARGS",
help="Model arguments as 'key=val,key2=val2' or JSON string",
) )
self._parser.add_argument(
# Evaluation Settings
eval_group = self._parser.add_argument_group("evaluation settings")
eval_group.add_argument(
"--num_fewshot", "--num_fewshot",
"-f", "-f",
type=int, type=int,
...@@ -84,200 +98,219 @@ class Run(SubCommand): ...@@ -84,200 +98,219 @@ class Run(SubCommand):
metavar="N", metavar="N",
help="Number of examples in few-shot context", help="Number of examples in few-shot context",
) )
self._parser.add_argument( eval_group.add_argument(
"--batch_size", "--batch_size",
"-b", "-b",
type=str, type=str,
default=argparse.SUPPRESS, default=argparse.SUPPRESS,
metavar="auto|auto:N|N", metavar="auto|auto:N|N",
help="Acceptable values are 'auto', 'auto:N' (recompute batchsize N times with time) or N, where N is an integer. Default 1.", help=textwrap.dedent(
"Batch size: 'auto', 'auto:N' (auto-tune N times), or integer (default: 1)"
),
) )
self._parser.add_argument( eval_group.add_argument(
"--max_batch_size", "--max_batch_size",
type=int, type=int,
default=None, default=None,
metavar="N", metavar="N",
help="Maximal batch size to try with --batch_size auto.", help="Maximum batch size when using --batch_size auto",
) )
self._parser.add_argument( eval_group.add_argument(
"--device", "--device",
type=str, type=str,
default=None, default=None,
help="Device to use (e.g. cuda, cuda:0, cpu). Model defaults. Default None.", metavar="DEVICE",
help="Device to use (e.g. cuda, cuda:0, cpu, mps)",
) )
self._parser.add_argument( eval_group.add_argument(
"--gen_kwargs",
type=try_parse_json,
default=None,
metavar="KWARGS",
help="Generation arguments as 'key=val,key2=val2' or JSON string",
)
# Data and Output
data_group = self._parser.add_argument_group("data and output")
data_group.add_argument(
"--output_path", "--output_path",
"-o", "-o",
default=None, default=None,
type=str, type=str,
metavar="DIR|DIR/file.json", metavar="OUTPUT_PATH",
help="Path where result metrics will be saved. Can be either a directory or a .json file. If the path is a directory and log_samples is true, the results will be saved in the directory. Else the parent directory will be used.", help="Output dir or json file for results (and samples)",
)
data_group.add_argument(
"--log_samples",
"-s",
action="store_true",
default=argparse.SUPPRESS,
help="Save all model outputs and documents for post-hoc analysis",
) )
self._parser.add_argument( data_group.add_argument(
"--limit", "--limit",
"-L", "-L",
type=float, type=float,
default=None, default=None,
metavar="N|0<N<1", metavar="N|0.0-1.0",
help="Limit the number of examples per task. " help="Limit examples per task (integer count or fraction)",
"If <1, limit is a percentage of the total number of examples.",
) )
self._parser.add_argument( data_group.add_argument(
"--samples", "--samples",
"-E", "-E",
default=None, default=None,
type=try_parse_json, type=try_parse_json,
metavar="/path/to/json", metavar="JSON_FILE",
help='JSON string or path to JSON file containing doc indices of selected examples to test. Format: {"task_name":[indices],...}', help=textwrap.dedent(
'JSON file with specific sample indices for inputs: {"task_name":[indices],...}. Incompatible with --limit.'
),
) )
self._parser.add_argument(
# Caching and Performance
cache_group = self._parser.add_argument_group("caching and performance")
cache_group.add_argument(
"--use_cache", "--use_cache",
"-c", "-c",
type=str, type=str,
default=None, default=None,
metavar="DIR", metavar="CACHE_DIR",
help="A path to a sqlite db file for caching model responses. `None` if not caching.", help="SQLite database path for caching model outputs.",
) )
self._parser.add_argument( cache_group.add_argument(
"--cache_requests", "--cache_requests",
type=request_caching_arg_to_dict, type=request_caching_arg_to_dict,
default=None, default=None,
choices=["true", "refresh", "delete"], choices=["true", "refresh", "delete"],
help="Speed up evaluation by caching the building of dataset requests. `None` if not caching.", help="Cache dataset request building (true|refresh|delete)",
) )
self._parser.add_argument( cache_group.add_argument(
"--check_integrity", "--check_integrity",
action="store_true", action="store_true",
default=argparse.SUPPRESS, default=argparse.SUPPRESS,
help="Whether to run the relevant part of the test suite for the tasks.", help="Run task test suite validation",
)
self._parser.add_argument(
"--write_out",
"-w",
action="store_true",
default=argparse.SUPPRESS,
help="Prints the prompt for the first few documents.",
)
self._parser.add_argument(
"--log_samples",
"-s",
action="store_true",
default=argparse.SUPPRESS,
help="If True, write out all model outputs and documents for per-sample measurement and post-hoc analysis. Use with --output_path.",
) )
self._parser.add_argument(
# Prompt Formatting
template_group = self._parser.add_argument_group("instruct formatting")
template_group.add_argument(
"--system_instruction", "--system_instruction",
type=str, type=str,
default=None, default=None,
help="System instruction to be used in the prompt", metavar="INSTRUCTION",
help="Add custom system instruction.",
) )
self._parser.add_argument( template_group.add_argument(
"--apply_chat_template", "--apply_chat_template",
type=str, type=str,
nargs="?", nargs="?",
const=True, const=True,
default=argparse.SUPPRESS, default=argparse.SUPPRESS,
help=( metavar="TEMPLATE",
"If True, apply chat template to the prompt. " help="Apply chat template to prompts (optional template name)",
"Providing `--apply_chat_template` without an argument will apply the default chat template to the prompt. "
"To apply a specific template from the available list of templates, provide the template name as an argument. "
"E.g. `--apply_chat_template template_name`"
),
) )
self._parser.add_argument( template_group.add_argument(
"--fewshot_as_multiturn", "--fewshot_as_multiturn",
action="store_true", action="store_true",
default=argparse.SUPPRESS, default=argparse.SUPPRESS,
help="If True, uses the fewshot as a multi-turn conversation", help="Use fewshot examples as multi-turn conversation",
) )
self._parser.add_argument(
"--show_config", # Task Management
action="store_true", task_group = self._parser.add_argument_group("task management")
default=argparse.SUPPRESS, task_group.add_argument(
help="If True, shows the the full config of all tasks at the end of the evaluation.",
)
self._parser.add_argument(
"--include_path", "--include_path",
type=str, type=str,
default=None, default=None,
metavar="DIR", metavar="TASK_DIR",
help="Additional path to include if there are external tasks to include.", help="Additional directory for external tasks",
) )
self._parser.add_argument(
"--gen_kwargs", # Logging and Tracking
type=try_parse_json, logging_group = self._parser.add_argument_group("logging and tracking")
default=None, logging_group.add_argument(
help=(
"Either comma delimited string or JSON formatted arguments for model generation on greedy_until tasks,"
""" e.g. '{"do_sample": True, temperature":0.7,"until":["hello"]}' or temperature=0,top_p=0.1."""
),
)
self._parser.add_argument(
"--verbosity", "--verbosity",
"-v", "-v",
type=str.upper, type=str.upper,
default=None, default=None,
metavar="CRITICAL|ERROR|WARNING|INFO|DEBUG", metavar="LEVEL",
help="(Deprecated) Controls logging verbosity level. Use the `LOGLEVEL` environment variable instead. Set to DEBUG for detailed output when testing or adding new task configurations.", help="(Deprecated) Log level. Use LOGLEVEL env var instead",
) )
self._parser.add_argument( logging_group.add_argument(
"--write_out",
"-w",
action="store_true",
default=argparse.SUPPRESS,
help="Print prompts for first few documents",
)
logging_group.add_argument(
"--show_config",
action="store_true",
default=argparse.SUPPRESS,
help="Display full task configuration after evaluation",
)
logging_group.add_argument(
"--wandb_args", "--wandb_args",
type=str, type=str,
default=argparse.SUPPRESS, default=argparse.SUPPRESS,
help="Comma separated string arguments passed to wandb.init, e.g. `project=lm-eval,job_type=eval`", metavar="ARGS",
help="Weights & Biases init arguments (key=val,key2=val2)",
) )
self._parser.add_argument( logging_group.add_argument(
"--wandb_config_args", "--wandb_config_args",
type=str, type=str,
default=argparse.SUPPRESS, default=argparse.SUPPRESS,
help="Comma separated string arguments passed to wandb.config.update. Use this to trace parameters that aren't already traced by default. eg. `lr=0.01,repeats=3`", metavar="ARGS",
help="Weights & Biases config arguments (key=val,key2=val2)",
) )
self._parser.add_argument( logging_group.add_argument(
"--hf_hub_log_args", "--hf_hub_log_args",
type=str, type=str,
default=argparse.SUPPRESS, default=argparse.SUPPRESS,
help="Comma separated string arguments passed to Hugging Face Hub's log function, e.g. `hub_results_org=EleutherAI,hub_repo_name=lm-eval-results`", metavar="ARGS",
help="Hugging Face Hub logging arguments (key=val,key2=val2)",
) )
self._parser.add_argument(
# Advanced Options
advanced_group = self._parser.add_argument_group("advanced options")
advanced_group.add_argument(
"--predict_only", "--predict_only",
"-x", "-x",
action="store_true", action="store_true",
default=argparse.SUPPRESS, default=argparse.SUPPRESS,
help="Use with --log_samples. Only model outputs will be saved and metrics will not be evaluated.", help="Save predictions only, skip metric computation",
) )
default_seed_string = "0,1234,1234,1234" default_seed_string = "0,1234,1234,1234"
self._parser.add_argument( advanced_group.add_argument(
"--seed", "--seed",
type=partial(_int_or_none_list_arg_type, 3, 4, default_seed_string), type=partial(_int_or_none_list_arg_type, 3, 4, default_seed_string),
default=default_seed_string, # for backward compatibility default=default_seed_string,
help=( metavar="SEED|S1,S2,S3,S4",
"Set seed for python's random, numpy, torch, and fewshot sampling.\n" help=textwrap.dedent(f"""
"Accepts a comma-separated list of 4 values for python's random, numpy, torch, and fewshot sampling seeds, " Random seeds for python,numpy,torch,fewshot (default: {default_seed_string}).
"respectively, or a single integer to set the same seed for all four.\n" Use single integer for all, or comma-separated list of 4 values.
f"The values are either an integer or 'None' to not set the seed. Default is `{default_seed_string}` " Use 'None' to skip setting a seed. Example: --seed 42 or --seed 0,None,8,52
"(for backward compatibility).\n" """).strip(),
"E.g. `--seed 0,None,8,52` sets `random.seed(0)`, `torch.manual_seed(8)`, and fewshot sampling seed to 52. " )
"Here numpy's seed is not set since the second value is `None`.\n" advanced_group.add_argument(
"E.g, `--seed 42` sets all four seeds to 42."
),
)
self._parser.add_argument(
"--trust_remote_code", "--trust_remote_code",
action="store_true", action="store_true",
default=argparse.SUPPRESS, default=argparse.SUPPRESS,
help="Sets trust_remote_code to True to execute code to create HF Datasets from the Hub", help="Allow executing remote code from Hugging Face Hub",
) )
self._parser.add_argument( advanced_group.add_argument(
"--confirm_run_unsafe_code", "--confirm_run_unsafe_code",
action="store_true", action="store_true",
default=argparse.SUPPRESS, default=argparse.SUPPRESS,
help="Confirm that you understand the risks of running unsafe code for tasks that require it", help="Confirm understanding of unsafe code execution risks",
) )
self._parser.add_argument( advanced_group.add_argument(
"--metadata", "--metadata",
type=json.loads, type=json.loads,
default=None, default=None,
help="""JSON string metadata to pass to task configs, for example '{"max_seq_lengths":[4096,8192]}'. Will be merged with model_args. Can also be set in task config.""", metavar="JSON",
help=textwrap.dedent(
"JSON metadata for task configs (merged with model_args), required for some tasks such as RULER"
),
) )
def execute(self, args: argparse.Namespace) -> None: def execute(self, args: argparse.Namespace) -> None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment