add docs

f3cfff61 · Baber · dbe4c391 · f3cfff61 · f3cfff61 · f3cfff61
Commit f3cfff61 authored Jul 04, 2025 by Baber
Showing with 29 additions and 13 deletions

configs/default_config.yaml configs/default_config.yaml +20 -8

lm_eval/_cli/run.py lm_eval/_cli/run.py +4 -4

lm_eval/config/evaluate_config.py lm_eval/config/evaluate_config.py +5 -1

No files found.
--- a/configs/default_config.yaml
+++ b/configs/default_config.yaml
-model: vllm
+# Language Model Evaluation Harness Configuration File
+#
+# This YAML configuration file allows you to specify evaluation parameters
+# instead of passing them as command-line arguments.
+#
+# Usage:
+#   $ lm_eval --config configs/default_config.yaml
+#
+# You can override any values in this config with command-line arguments:
+#   $ lm_eval --config configs/default_config.yaml --model_args pretrained=gpt2 --tasks mmlu
+#
+# All parameters are optional and have the same meaning as their CLI counterparts.
+
+model: hf
 model_args:
-  pretrained: Qwen/Qwen2.5-0.5B-Instruct
-  dtype: bfloat16
-  tensor_parallel_size: 1
-tasks: hellaswag,gsm8k
+  pretrained: EleutherAI/pythia-14m
+  dtype: float16
+tasks:
+  - hellaswag
+  - gsm8k
 batch_size: 1
 trust_remote_code: true
 log_samples: true
 output_path: ./test
-apply_chat_template: true
-fewshot_as_multiturn: true
-limit: 5
+limit: 10
--- a/lm_eval/_cli/run.py
+++ b/lm_eval/_cli/run.py
@@ -47,7 +47,7 @@ class Run(SubCommand):
    def _add_args(self) -> None:
        self._parser = self._parser

-        # Configuration
+        # Defaults are set in config/evaluate_config.py
        config_group = self._parser.add_argument_group("configuration")
        config_group.add_argument(
            "--config",
@@ -64,7 +64,7 @@ class Run(SubCommand):
            "--model",
            "-m",
            type=str,
-            default="hf",
+            default=None,
            metavar="MODEL_NAME",
            help="Model name (default: hf)",
        )
@@ -283,7 +283,7 @@ class Run(SubCommand):
        advanced_group.add_argument(
            "--seed",
            type=partial(_int_or_none_list_arg_type, 3, 4, default_seed_string),
-            default=default_seed_string,
+            default=None,
            metavar="SEED|S1,S2,S3,S4",
            help=textwrap.dedent(f"""
                Random seeds for python,numpy,torch,fewshot (default: {default_seed_string}).
@@ -309,7 +309,7 @@ class Run(SubCommand):
            default=None,
            metavar="JSON",
            help=textwrap.dedent(
-                "JSON metadata for task configs (merged with model_args), required for some tasks such as RULER"
+                """JSON metadata for task configs (merged with model_args), required for some tasks such as RULER"""
            ),
        )


--- a/lm_eval/config/evaluate_config.py
+++ b/lm_eval/config/evaluate_config.py
 import json
 import logging
+import textwrap
 from argparse import Namespace
 from dataclasses import asdict, dataclass, field
 from pathlib import Path
@@ -204,7 +205,7 @@ class EvaluatorConfig:
        config = asdict(cls())

        # Load and merge YAML config if provided
-        if hasattr(namespace, "config") and namespace.config:
+        if used_config := hasattr(namespace, "config") and namespace.config:
            config.update(cls._load_yaml_config(namespace.config))

        # Override with CLI args (only truthy values, exclude non-config args)
@@ -219,6 +220,8 @@ class EvaluatorConfig:

        # Create instance and validate
        instance = cls(**config)
+        if used_config:
+            print(textwrap.dedent(f"""{instance}"""))
        instance.validate_and_preprocess()

        return instance
@@ -252,6 +255,7 @@ class EvaluatorConfig:

        try:
            yaml_data = yaml.safe_load(config_file.read_text())
+            print(textwrap.dedent(f"""yaml: {yaml_data}"""))
        except yaml.YAMLError as e:
            raise ValueError(f"Invalid YAML in {config_path}: {e}")
        except (OSError, UnicodeDecodeError) as e: