pre-commit

be78dc7a · Baber · b7d3f0dd · be78dc7a · be78dc7a · be78dc7a
Commit be78dc7a authored Jul 04, 2025 by Baber
Showing with 26 additions and 26 deletions

lm_eval/_cli/eval.py lm_eval/_cli/eval.py +6 -6

lm_eval/_cli/list.py lm_eval/_cli/list.py +7 -7

lm_eval/_cli/run.py lm_eval/_cli/run.py +4 -4

lm_eval/_cli/validate.py lm_eval/_cli/validate.py +9 -9

No files found.
--- a/lm_eval/_cli/eval.py
+++ b/lm_eval/_cli/eval.py
@@ -18,24 +18,24 @@ class Eval:
                quick start:
                  # Basic evaluation
                  lm-eval run --model hf --model_args pretrained=gpt2 --tasks hellaswag
                  # List available tasks
                  lm-eval list tasks
                  # Validate task configurations
                  lm-eval validate --tasks hellaswag,arc_easy
                available commands:
                  run       Run the harness on specified tasks
                  list      List available tasks, groups, subtasks, or tags
                  validate  Validate task configurations and check for errors
                legacy compatibility:
                  The harness maintains backward compatibility with the original interface.
                  If no command is specified, 'run' is automatically inserted:
                  lm-eval --model hf --tasks hellaswag  # Equivalent to 'lm-eval run --model hf --tasks hellaswag'
                For documentation, visit: https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/interface.md
            """),
            formatter_class=argparse.RawDescriptionHelpFormatter,

--- a/lm_eval/_cli/list.py
+++ b/lm_eval/_cli/list.py
@@ -19,28 +19,28 @@ class List(SubCommand):
                examples:
                  # List all available tasks (includes groups, subtasks, and tags)
                  $ lm-eval list tasks
                  # List only task groups (like 'mmlu', 'glue', 'superglue')
                  $ lm-eval list groups
                  # List only individual subtasks (like 'mmlu_abstract_algebra')
                  $ lm-eval list subtasks
                  # Include external task definitions
                  $ lm-eval list tasks --include_path /path/to/external/tasks
                  # List tasks from multiple external paths
                  $ lm-eval list tasks --include_path "/path/to/tasks1:/path/to/tasks2"
                organization:
                  • Groups: Collections of tasks with aggregated metric across subtasks (e.g., 'mmlu')
                  • Subtasks: Individual evaluation tasks (e.g., 'mmlu_anatomy', 'hellaswag')
                  • Tags: Similar to groups but no aggregate metric (e.g., 'reasoning', 'knowledge', 'language')
                  • External Tasks: Custom tasks defined in external directories
                evaluation usage:
                  After listing tasks, use them with the run command!
                For more information tasks configs are defined in https://github.com/EleutherAI/lm-evaluation-harness/tree/main/lm_eval/tasks
            """),
            formatter_class=argparse.RawDescriptionHelpFormatter,

--- a/lm_eval/_cli/run.py
+++ b/lm_eval/_cli/run.py
@@ -27,16 +27,16 @@ class Run(SubCommand):
                examples:
                  # Basic evaluation with HuggingFace model
                  $ lm-eval run --model hf --model_args pretrained=gpt2 --tasks hellaswag
                  # Evaluate on multiple tasks with few-shot examples
                  $ lm-eval run --model vllm --model_args pretrained=EleutherAI/gpt-j-6B --tasks arc_easy,arc_challenge --num_fewshot 5
                  # Evaluation with custom generation parameters
                  $ lm-eval run --model hf --model_args pretrained=gpt2 --tasks lambada --gen_kwargs "temperature=0.8,top_p=0.95"
                  # Use configuration file
                  $ lm-eval run --config my_config.yaml --tasks mmlu
                For more information, see: https://github.com/EleutherAI/lm-evaluation-harness
            """),
            formatter_class=argparse.RawDescriptionHelpFormatter,

--- a/lm_eval/_cli/validate.py
+++ b/lm_eval/_cli/validate.py
@@ -20,19 +20,19 @@ class Validate(SubCommand):
                examples:
                  # Validate a single task
                  lm-eval validate --tasks hellaswag
                  # Validate multiple tasks
                  lm-eval validate --tasks arc_easy,arc_challenge,hellaswag
                  # Validate a task group
                  lm-eval validate --tasks mmlu
                  # Validate tasks with external definitions
                  lm-eval validate --tasks my_custom_task --include_path ./custom_tasks
                  # Validate tasks from multiple external paths
                  lm-eval validate --tasks custom_task1,custom_task2 --include_path "/path/to/tasks1:/path/to/tasks2"
                validation check:
                  The validate command performs several checks:
                  • Task existence: Verifies all specified tasks are available
@@ -42,7 +42,7 @@ class Validate(SubCommand):
                  • Metric definitions: Verifies metric functions and aggregation methods
                  • Filter pipelines: Validates filter chains and their parameters
                  • Template rendering: Tests prompt templates with sample data
                task config files:
                  Tasks are defined using YAML configuration files with these key sections:
                  • task: Task name and metadata
@@ -52,7 +52,7 @@ class Validate(SubCommand):
                  • metric_list: List of evaluation metrics to compute
                  • output_type: Type of model output (loglikelihood, generate_until, etc.)
                  • filter_list: Post-processing filters for model outputs
                common errors:
                  • Missing required fields in YAML configuration
                  • Invalid dataset paths or missing dataset splits
@@ -61,13 +61,13 @@ class Validate(SubCommand):
                  • Invalid filter names or parameters
                  • Circular dependencies in task inheritance
                  • Missing external task files when using --include_path
                debugging tips:
                  • Use --include_path to test external task definitions
                  • Check task configuration files for syntax errors
                  • Verify dataset access and authentication if needed
                  • Use 'lm-eval list tasks' to see available tasks
                For task configuration guide, see: https://github.com/EleutherAI/lm-evaluation-harness/blob/main/docs/task_guide.md
            """),
            formatter_class=argparse.RawDescriptionHelpFormatter,